Skip to content

Commit

Permalink
Merge branch 'dagster_asset_dep_capture' of github.com:treff7es/datah…
Browse files Browse the repository at this point in the history
…ub into dagster_asset_dep_capture
  • Loading branch information
treff7es committed Aug 22, 2024
2 parents 21aa495 + b866f8b commit c80d9e5
Show file tree
Hide file tree
Showing 209 changed files with 24,487 additions and 5,092 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ jobs:
-x :metadata-ingestion-modules:airflow-plugin:check \
-x :metadata-ingestion-modules:dagster-plugin:build \
-x :metadata-ingestion-modules:dagster-plugin:check \
-x :metadata-ingestion-modules:gx-plugin:build \
-x :metadata-ingestion-modules:gx-plugin:check \
-x :datahub-frontend:build \
-x :datahub-web-react:build \
--parallel
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ jobs:
mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }}
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
Expand Down Expand Up @@ -813,7 +814,7 @@ jobs:
echo 'matrix=["cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> $GITHUB_OUTPUT
elif [ '${{ needs.setup.outputs.backend_change }}' == 'true' ]; then
elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then
echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> $GITHUB_OUTPUT
else
echo 'matrix=[]' >> $GITHUB_OUTPUT
Expand Down
87 changes: 87 additions & 0 deletions .github/workflows/gx-plugin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
name: GX Plugin
on:
push:
branches:
- master
paths:
- ".github/workflows/gx-plugin.yml"
- "metadata-ingestion-modules/gx-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
pull_request:
branches:
- master
paths:
- ".github/**"
- "metadata-ingestion-modules/gx-plugin/**"
- "metadata-ingestion/**"
- "metadata-models/**"
release:
types: [published]

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
gx-plugin:
runs-on: ubuntu-latest
env:
SPARK_VERSION: 3.0.3
DATAHUB_TELEMETRY_ENABLED: false
strategy:
matrix:
python-version: ["3.8", "3.10"]
include:
- python-version: "3.8"
extraPythonRequirement: "great-expectations~=0.15.12"
- python-version: "3.10"
extraPythonRequirement: "great-expectations~=0.16.0 numpy~=1.26.0"
- python-version: "3.11"
extraPythonRequirement: "great-expectations~=0.17.0"
fail-fast: false
steps:
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
distribution: "zulu"
java-version: 17
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Install GX package and test (extras ${{ matrix.extraPythonRequirement }})
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:gx-plugin:lint :metadata-ingestion-modules:gx-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/gx-plugin/venv/bin/activate && pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'great-expectations~=0.17.0' }}
with:
name: Test Results (GX Plugin ${{ matrix.python-version}})
path: |
**/build/reports/tests/test/**
**/build/test-results/test/**
**/junit.*.xml
- name: Upload coverage to Codecov
if: always()
uses: codecov/codecov-action@v3
with:
token: ${{ secrets.CODECOV_TOKEN }}
directory: .
fail_ci_if_error: false
flags: gx-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }}
name: pytest-gx
verbose: true

event-file:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
2 changes: 1 addition & 1 deletion .github/workflows/test-results.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Test Results

on:
workflow_run:
workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin"]
workflows: ["build & test", "metadata ingestion", "Airflow Plugin", "Dagster Plugin", "GX Plugin"]
types:
- completed

Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Peloton](https://www.onepeloton.com)
- [PITS Global Data Recovery Services](https://www.pitsdatarecovery.net/)
- [Razer](https://www.razer.com)
- [Rippling](https://www.rippling.com/)
- [Showroomprive](https://www.showroomprive.com/)
- [SpotHero](https://spothero.com)
- [Stash](https://www.stash.com)
Expand All @@ -153,6 +154,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to
- [Zynga](https://www.zynga.com)



## Select Articles & Talks

- [DataHub Blog](https://blog.datahubproject.io/)
Expand All @@ -173,6 +175,23 @@ Here are the companies that have officially adopted DataHub. Please feel free to

See the full list [here](docs/links.md).

## Security Notes

### Multi-Component

The DataHub project uses a wide range of code which is responsible for build automation, documentation generation, and
include both service (i.e. GMS) and client (i.e. ingestion) components. When evaluating security vulnerabilities in
upstream dependencies, it is important to consider which component and how it is used in the project. For example, an
upstream javascript library may include a Denial of Service (DoS) vulnerability however when used for generating
documentation it does not affect the running of DataHub itself and cannot be used to impact DataHub's service. Similarly,
python dependencies for ingestion are part of the DataHub client and are not exposed as a service.

### Known False Positives

DataHub's ingestion client does not include credentials in the code repository, python package, or Docker images.
Upstream python dependencies may include files that look like credentials and are often misinterpreted as credentials
by automated scanners.

## License

[Apache License 2.0](./LICENSE).
Original file line number Diff line number Diff line change
Expand Up @@ -2728,19 +2728,23 @@ private void configureFormResolvers(final RuntimeWiring.Builder builder) {
corpUserType,
(env) -> {
final FormActorAssignment actors = env.getSource();
return actors.getUsers().stream()
.map(CorpUser::getUrn)
.collect(Collectors.toList());
return actors.getUsers() != null
? actors.getUsers().stream()
.map(CorpUser::getUrn)
.collect(Collectors.toList())
: null;
}))
.dataFetcher(
"groups",
new LoadableTypeBatchResolver<>(
corpGroupType,
(env) -> {
final FormActorAssignment actors = env.getSource();
return actors.getGroups().stream()
.map(CorpGroup::getUrn)
.collect(Collectors.toList());
return actors.getGroups() != null
? actors.getGroups().stream()
.map(CorpGroup::getUrn)
.collect(Collectors.toList())
: null;
}))
.dataFetcher("isAssignedToMe", new IsFormAssignedToMeResolver(groupService)));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ public static FormActorAssignment mapFormActorAssignment(
if (input.getGroups() != null) {
UrnArray groupUrns = new UrnArray();
input.getGroups().forEach(group -> groupUrns.add(UrnUtils.getUrn(group)));
result.setUsers(groupUrns);
result.setGroups(groupUrns);
}

return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public static boolean isOwnerEqual(
if (!owner.getOwner().equals(ownerUrn)) {
return false;
}
if (owner.getTypeUrn() != null) {
if (owner.getTypeUrn() != null && ownershipTypeUrn != null) {
return owner.getTypeUrn().equals(ownershipTypeUrn);
}
if (ownershipTypeUrn == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public CompletableFuture<DocPropagationSettings> get(final DataFetchingEnvironme
final GlobalSettingsInfo globalSettings =
_settingsService.getGlobalSettings(context.getOperationContext());
final DocPropagationSettings defaultSettings = new DocPropagationSettings();
defaultSettings.setDocColumnPropagation(true);
// TODO: Enable by default. Currently the automation trusts the settings aspect, which
// does not have this.
defaultSettings.setDocColumnPropagation(false);
return globalSettings != null && globalSettings.hasDocPropagation()
? mapDocPropagationSettings(globalSettings.getDocPropagation())
: defaultSettings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException {
Urn technicalOwnershipTypeUrn = new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN);
Urn businessOwnershipTypeUrn = new Urn(BUSINESS_OWNER_OWNERSHIP_TYPE_URN);
Urn ownerUrn1 = new Urn("urn:li:corpuser:foo");
Urn ownerUrn2 = new Urn("urn:li:corpuser:bar");

Owner ownerWithTechnicalOwnership = new Owner();
ownerWithTechnicalOwnership.setOwner(ownerUrn1);
Expand All @@ -72,12 +73,17 @@ public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException {
ownerWithoutOwnershipType.setOwner(ownerUrn1);
ownerWithoutOwnershipType.setType(OwnershipType.NONE);

Owner owner2WithoutOwnershipType = new Owner();
owner2WithoutOwnershipType.setOwner(ownerUrn2);
owner2WithoutOwnershipType.setType(OwnershipType.NONE);

assertTrue(
OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn));
assertFalse(
OwnerUtils.isOwnerEqual(ownerWithBusinessOwnership, ownerUrn1, technicalOwnershipTypeUrn));
assertFalse(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null));
assertTrue(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null));
assertTrue(OwnerUtils.isOwnerEqual(ownerWithoutOwnershipType, ownerUrn1, null));
assertFalse(OwnerUtils.isOwnerEqual(owner2WithoutOwnershipType, ownerUrn1, null));
}

public void testIsOwnerEqualWithBothLegacyAndNewType() throws URISyntaxException {
Expand Down
2 changes: 1 addition & 1 deletion datahub-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ dependencies {
// mock internal schema registry
implementation externalDependency.kafkaAvroSerde
implementation externalDependency.kafkaAvroSerializer
implementation "org.apache.kafka:kafka_2.12:3.7.0"
implementation "org.apache.kafka:kafka_2.12:3.7.1"

implementation externalDependency.slf4jApi
compileOnly externalDependency.lombok
Expand Down
2 changes: 1 addition & 1 deletion datahub-web-react/.eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ module.exports = {
],
'vitest/prefer-to-be': 'off',
'@typescript-eslint/no-use-before-define': ['error', { functions: false, classes: false }],
'react-refresh/only-export-components': ['warn', { 'allowConstantExport': true }],
'react-refresh/only-export-components': ['warn', { allowConstantExport: true }],
},
settings: {
react: {
Expand Down
Loading

0 comments on commit c80d9e5

Please sign in to comment.