Skip to content

Commit

Permalink
Merge branch 'master' into fix-bq-ingest-nested-descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
maaaikoool authored Oct 25, 2023
2 parents 28b29f7 + 9cccd22 commit 92691b9
Show file tree
Hide file tree
Showing 506 changed files with 15,299 additions and 5,719 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:
- "metadata-models/**"
pull_request:
branches:
- master
- "**"
paths:
- ".github/**"
- "metadata-ingestion-modules/airflow-plugin/**"
Expand Down
25 changes: 13 additions & 12 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
- "**.md"
pull_request:
branches:
- master
- "**"
paths-ignore:
- "docs/**"
- "**.md"
Expand All @@ -24,17 +24,12 @@ jobs:
strategy:
fail-fast: false
matrix:
command:
[
command: [
# metadata-ingestion and airflow-plugin each have dedicated build jobs
"./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
"./gradlew :datahub-frontend:build :datahub-web-react:build --parallel",
]
timezone:
[
"UTC",
"America/New_York",
"except_metadata_ingestion",
"frontend"
]
timezone: ["UTC", "America/New_York"]
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
Expand All @@ -53,9 +48,15 @@ jobs:
with:
python-version: "3.10"
cache: pip
- name: Gradle build (and test)
- name: Gradle build (and test) for metadata ingestion
# we only need the timezone runs for frontend tests
if: ${{ matrix.command == 'except_metadata_ingestion' && matrix.timezone == 'America/New_York' }}
run: |
./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel
- name: Gradle build (and test) for frontend
if: ${{ matrix.command == 'frontend' }}
run: |
${{ matrix.command }}
./gradlew :datahub-frontend:build :datahub-web-react:build --parallel
env:
NODE_OPTIONS: "--max-old-space-size=3072"
- uses: actions/upload-artifact@v3
Expand Down
9 changes: 2 additions & 7 deletions .github/workflows/check-datahub-jars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:
- "**.md"
pull_request:
branches:
- master
- "**"
paths-ignore:
- "docker/**"
- "docs/**"
Expand All @@ -28,12 +28,7 @@ jobs:
max-parallel: 1
fail-fast: false
matrix:
command:
[
"datahub-client",
"datahub-protobuf",
"spark-lineage"
]
command: ["datahub-client", "datahub-protobuf", "spark-lineage"]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/close-stale-issues.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ jobs:
days-before-issue-stale: 30
days-before-issue-close: 30
stale-issue-label: "stale"
stale-issue-message: "This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io. For feature requests please use https://feature-requests.datahubproject.io"
stale-issue-message:
"This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io.\
\ For feature requests please use https://feature-requests.datahubproject.io"
close-issue-message: "This issue was closed because it has been inactive for 30 days since being marked as stale."
days-before-pr-stale: -1
days-before-pr-close: -1
Expand Down
13 changes: 4 additions & 9 deletions .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:
- ".github/workflows/code-checks.yml"
pull_request:
branches:
- master
- "**"
paths:
- "metadata-io/**"
- "datahub-web-react/**"
Expand All @@ -21,17 +21,12 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true


jobs:
code_check:
strategy:
fail-fast: false
matrix:
command:
[
"check_event_type.py",
"check_policies.py"
]
command: ["check_event_type.py", "check_policies.py"]
name: run code checks
runs-on: ubuntu-latest
steps:
Expand All @@ -43,5 +38,5 @@ jobs:
with:
python-version: "3.10"
- name: run check ${{ matrix.command }}
run: |
python .github/scripts/${{ matrix.command }}
run: |-
python .github/scripts/${{ matrix.command }}
3 changes: 1 addition & 2 deletions .github/workflows/docker-postgres-setup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
- ".github/workflows/docker-postgres-setup.yml"
pull_request:
branches:
- master
- "**"
paths:
- "docker/postgres-setup/**"
- ".github/workflows/docker-postgres-setup.yml"
Expand Down Expand Up @@ -61,4 +61,3 @@ jobs:
context: .
file: ./docker/postgres-setup/Dockerfile
platforms: linux/amd64,linux/arm64

7 changes: 3 additions & 4 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ on:
- "**.md"
pull_request:
branches:
- master
- "**"
paths-ignore:
- "docs/**"
- "**.md"
Expand Down Expand Up @@ -545,7 +545,6 @@ jobs:
id: tag
run: echo "tag=${{ steps.filter.outputs.datahub-ingestion-base == 'true' && needs.setup.outputs.unique_full_tag || 'head' }}" >> $GITHUB_OUTPUT


datahub_ingestion_slim_build:
name: Build and Push DataHub Ingestion Docker Images
runs-on: ubuntu-latest
Expand Down Expand Up @@ -809,8 +808,8 @@ jobs:
DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }}
DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }}
ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag }}
ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5'
ACTIONS_CONFIG: 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml'
ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5"
ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
run: |
./smoke-test/run-quickstart.sh
- name: sleep 60s
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: documentation
on:
pull_request:
branches:
- master
- "**"
push:
branches:
- master
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/lint-actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ name: Lint actions
on:
pull_request:
paths:
- '.github/workflows/**'
- ".github/workflows/**"

branches:
- "**"
jobs:
actionlint:
runs-on: ubuntu-latest
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:
- "metadata-models/**"
pull_request:
branches:
- master
- "**"
paths:
- ".github/**"
- "metadata-ingestion/**"
Expand All @@ -34,7 +34,6 @@ jobs:
python-version: ["3.7", "3.10"]
command:
[
"lint",
"testQuick",
"testIntegrationBatch0",
"testIntegrationBatch1",
Expand All @@ -54,6 +53,9 @@ jobs:
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Install package
run: ./gradlew :metadata-ingestion:installPackageOnly
- name: Run lint alongwith testQuick
if: ${{ matrix.command == 'testQuick' }}
run: ./gradlew :metadata-ingestion:lint
- name: Run metadata-ingestion tests
run: ./gradlew :metadata-ingestion:${{ matrix.command }}
- name: Debug info
Expand All @@ -65,15 +67,14 @@ jobs:
docker image ls
docker system df
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.command != 'lint' }}
with:
name: Test Results (metadata ingestion ${{ matrix.python-version }})
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
- name: Upload coverage to Codecov
if: ${{ always() && matrix.python-version == '3.10' && matrix.command != 'lint' }}
if: ${{ always() && matrix.python-version == '3.10' }}
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/metadata-io.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ on:
- "metadata-io/**"
pull_request:
branches:
- master
- "**"
paths:
- "**/*.gradle"
- "li-utils/**"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/spark-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:
- ".github/workflows/spark-smoke-test.yml"
pull_request:
branches:
- master
- "**"
paths:
- "metadata_models/**"
- "metadata-integration/java/datahub-client/**"
Expand Down
7 changes: 3 additions & 4 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ buildscript {
dependencies {
classpath 'com.linkedin.pegasus:gradle-plugins:' + pegasusVersion
classpath 'com.github.node-gradle:gradle-node-plugin:2.2.4'
classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.8.1'
classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.2.0'
classpath 'org.springframework.boot:spring-boot-gradle-plugin:' + springBootVersion
classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.30.0"
classpath "com.palantir.gradle.gitversion:gradle-git-version:3.0.0"
Expand Down Expand Up @@ -67,8 +67,8 @@ project.ext.externalDependency = [
'antlr4Runtime': 'org.antlr:antlr4-runtime:4.7.2',
'antlr4': 'org.antlr:antlr4:4.7.2',
'assertJ': 'org.assertj:assertj-core:3.11.1',
'avro_1_7': 'org.apache.avro:avro:1.7.7',
'avroCompiler_1_7': 'org.apache.avro:avro-compiler:1.7.7',
'avro': 'org.apache.avro:avro:1.11.3',
'avroCompiler': 'org.apache.avro:avro-compiler:1.11.3',
'awsGlueSchemaRegistrySerde': 'software.amazon.glue:schema-registry-serde:1.1.10',
'awsMskIamAuth': 'software.amazon.msk:aws-msk-iam-auth:1.1.1',
'awsSecretsManagerJdbc': 'com.amazonaws.secretsmanager:aws-secretsmanager-jdbc:1.0.8',
Expand Down Expand Up @@ -127,7 +127,6 @@ project.ext.externalDependency = [
'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1',
'jna': 'net.java.dev.jna:jna:5.12.1',
'jsonPatch': 'com.github.java-json-tools:json-patch:1.13',
'jsonSchemaAvro': 'com.github.fge:json-schema-avro:0.1.4',
'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1',
'jsonSmart': 'net.minidev:json-smart:2.4.9',
'json': 'org.json:json:20230227',
Expand Down
9 changes: 8 additions & 1 deletion buildSrc/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@ buildscript {
}

dependencies {
implementation('io.acryl:json-schema-avro:0.1.5') {
/**
* Forked version of abandoned repository: https://github.com/fge/json-schema-avro
* Maintainer last active 2014, we maintain an active fork of this repository to utilize mapping Avro schemas to Json Schemas,
* repository is as close to official library for this as you can get. Original maintainer is one of the authors of Json Schema spec.
* Other companies are also separately maintaining forks (like: https://github.com/java-json-tools/json-schema-avro).
* We have built several customizations on top of it for various bug fixes, especially around union scheams
*/
implementation('io.acryl:json-schema-avro:0.2.2') {
exclude group: 'com.fasterxml.jackson.core', module: 'jackson-databind'
exclude group: 'com.google.guava', module: 'guava'
}
Expand Down
4 changes: 2 additions & 2 deletions datahub-frontend/app/auth/AuthModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class AuthModule extends AbstractModule {
* Pac4j Stores Session State in a browser-side cookie in encrypted fashion. This configuration
* value provides a stable encryption base from which to derive the encryption key.
*
* We hash this value (SHA1), then take the first 16 bytes as the AES key.
* We hash this value (SHA256), then take the first 16 bytes as the AES key.
*/
private static final String PAC4J_AES_KEY_BASE_CONF = "play.http.secret.key";
private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider";
Expand Down Expand Up @@ -93,7 +93,7 @@ protected void configure() {
// it to hex and slice the first 16 bytes, because AES key length must strictly
// have a specific length.
final String aesKeyBase = _configs.getString(PAC4J_AES_KEY_BASE_CONF);
final String aesKeyHash = DigestUtils.sha1Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
final String aesKeyHash = DigestUtils.sha256Hex(aesKeyBase.getBytes(StandardCharsets.UTF_8));
final String aesEncryptionKey = aesKeyHash.substring(0, 16);
playCacheCookieStore = new PlayCookieSessionStore(
new ShiroAesDataEncrypter(aesEncryptionKey.getBytes()));
Expand Down
9 changes: 8 additions & 1 deletion datahub-frontend/app/auth/AuthUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ public class AuthUtils {
*/
public static final String SYSTEM_CLIENT_SECRET_CONFIG_PATH = "systemClientSecret";

/**
* Cookie name for redirect url that is manually separated from the session to reduce size
*/
public static final String REDIRECT_URL_COOKIE_NAME = "REDIRECT_URL";

public static final CorpuserUrn DEFAULT_ACTOR_URN = new CorpuserUrn("datahub");

public static final String LOGIN_ROUTE = "/login";
Expand Down Expand Up @@ -77,7 +82,9 @@ public static boolean isEligibleForForwarding(Http.Request req) {
* as well as their agreement to determine authentication status.
*/
public static boolean hasValidSessionCookie(final Http.Request req) {
return req.session().data().containsKey(ACTOR)
Map<String, String> sessionCookie = req.session().data();
return sessionCookie.containsKey(ACCESS_TOKEN)
&& sessionCookie.containsKey(ACTOR)
&& req.getCookie(ACTOR).isPresent()
&& req.session().data().get(ACTOR).equals(req.getCookie(ACTOR).get().value());
}
Expand Down
22 changes: 22 additions & 0 deletions datahub-frontend/app/auth/cookie/CustomCookiesModule.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package auth.cookie;

import com.google.inject.AbstractModule;
import play.api.libs.crypto.CookieSigner;
import play.api.libs.crypto.CookieSignerProvider;
import play.api.mvc.DefaultFlashCookieBaker;
import play.api.mvc.FlashCookieBaker;
import play.api.mvc.SessionCookieBaker;


public class CustomCookiesModule extends AbstractModule {

@Override
public void configure() {
bind(CookieSigner.class).toProvider(CookieSignerProvider.class);
// We override the session cookie baker to not use a fallback, this prevents using an old URL Encoded cookie
bind(SessionCookieBaker.class).to(CustomSessionCookieBaker.class);
// We don't care about flash cookies, we don't use them
bind(FlashCookieBaker.class).to(DefaultFlashCookieBaker.class);
}

}
25 changes: 25 additions & 0 deletions datahub-frontend/app/auth/cookie/CustomSessionCookieBaker.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package auth.cookie

import com.google.inject.Inject
import play.api.http.{SecretConfiguration, SessionConfiguration}
import play.api.libs.crypto.CookieSigner
import play.api.mvc.DefaultSessionCookieBaker

import scala.collection.immutable.Map

/**
* Overrides default fallback to URL Encoding behavior, prevents usage of old URL encoded session cookies
* @param config
* @param secretConfiguration
* @param cookieSigner
*/
class CustomSessionCookieBaker @Inject() (
override val config: SessionConfiguration,
override val secretConfiguration: SecretConfiguration,
cookieSigner: CookieSigner
) extends DefaultSessionCookieBaker(config, secretConfiguration, cookieSigner) {
// Has to be a Scala class because it extends a trait with concrete implementations, Scala does compilation tricks

// Forces use of jwt encoding and disallows fallback to legacy url encoding
override def decode(encodedData: String): Map[String, String] = jwtCodec.decode(encodedData)
}
Loading

0 comments on commit 92691b9

Please sign in to comment.