Skip to content

Commit

Permalink
Merge branch 'master' into feat-configurable-bootstrap-policies
Browse files Browse the repository at this point in the history
  • Loading branch information
sgomezvillamor authored Oct 4, 2023
2 parents af3ca90 + 13508a9 commit 4ece833
Show file tree
Hide file tree
Showing 115 changed files with 12,972 additions and 1,369 deletions.
25 changes: 15 additions & 10 deletions .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,21 @@ jobs:
strategy:
matrix:
include:
- python-version: "3.7"
extraPythonRequirement: "apache-airflow~=2.1.0"
- python-version: "3.7"
extraPythonRequirement: "apache-airflow~=2.2.0"
- python-version: "3.8"
extra_pip_requirements: "apache-airflow~=2.1.4"
extra_pip_extras: plugin-v1
- python-version: "3.8"
extra_pip_requirements: "apache-airflow~=2.2.4"
extra_pip_extras: plugin-v1
- python-version: "3.10"
extraPythonRequirement: "apache-airflow~=2.4.0"
extra_pip_requirements: "apache-airflow~=2.4.0"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extraPythonRequirement: "apache-airflow~=2.6.0"
extra_pip_requirements: "apache-airflow~=2.6.0"
extra_pip_extras: plugin-v2
- python-version: "3.10"
extraPythonRequirement: "apache-airflow>2.6.0"
extra_pip_requirements: "apache-airflow>=2.7.0"
extra_pip_extras: plugin-v2
fail-fast: false
steps:
- uses: actions/checkout@v3
Expand All @@ -51,13 +56,13 @@ jobs:
cache: "pip"
- name: Install dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Install airflow package and test (extras ${{ matrix.extraPythonRequirement }})
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extraPythonRequirement }}' :metadata-ingestion-modules:airflow-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick
- name: Install airflow package and test (extras ${{ matrix.extra_pip_requirements }})
run: ./gradlew -Pextra_pip_requirements='${{ matrix.extra_pip_requirements }}' -Pextra_pip_extras='${{ matrix.extra_pip_extras }}' :metadata-ingestion-modules:airflow-plugin:lint :metadata-ingestion-modules:airflow-plugin:testQuick
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'apache-airflow>2.6.0' }}
if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }}
with:
name: Test Results (Airflow Plugin ${{ matrix.python-version}})
path: |
Expand Down
9 changes: 5 additions & 4 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ jobs:
matrix:
command:
[
"./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
# metadata-ingestion and airflow-plugin each have dedicated build jobs
"./gradlew build -x :metadata-ingestion:build -x :metadata-ingestion:check -x docs-website:build -x :metadata-integration:java:spark-lineage:test -x :metadata-io:test -x :metadata-ingestion-modules:airflow-plugin:build -x :metadata-ingestion-modules:airflow-plugin:check -x :datahub-frontend:build -x :datahub-web-react:build --parallel",
"./gradlew :datahub-frontend:build :datahub-web-react:build --parallel",
"./gradlew :metadata-ingestion-modules:airflow-plugin:build --parallel"
]
timezone:
[
Expand All @@ -51,7 +51,8 @@ jobs:
java-version: 11
- uses: actions/setup-python@v4
with:
python-version: "3.7"
python-version: "3.10"
cache: pip
- name: Gradle build (and test)
run: |
${{ matrix.command }}
Expand Down Expand Up @@ -81,7 +82,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.7"
python-version: "3.10"
- name: Download YQ
uses: chrisdickinson/[email protected]
with:
Expand Down
13 changes: 9 additions & 4 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ jobs:
[
"lint",
"testQuick",
"testIntegration",
"testIntegrationBatch0",
"testIntegrationBatch1",
"testSlowIntegration",
"testIntegrationBatch2",
]
include:
- python-version: "3.7"
Expand All @@ -56,9 +56,14 @@ jobs:
run: ./gradlew :metadata-ingestion:installPackageOnly
- name: Run metadata-ingestion tests
run: ./gradlew :metadata-ingestion:${{ matrix.command }}
- name: pip freeze show list installed
- name: Debug info
if: always()
run: source metadata-ingestion/venv/bin/activate && pip freeze
run: |
source metadata-ingestion/venv/bin/activate && pip freeze
set -x
df -hl
docker image ls
docker system df
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.command != 'lint' }}
with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ export default function EmbeddedProfile<T>({ urn, entityType, getOverridePropert
return <NonExistentEntityPage />;
}

const readOnly = false;

return (
<EntityContext.Provider
value={{
Expand All @@ -80,15 +82,15 @@ export default function EmbeddedProfile<T>({ urn, entityType, getOverridePropert
<StyledDivider />
<UpstreamHealth />
<StyledDivider />
<SidebarAboutSection readOnly />
<SidebarAboutSection readOnly={readOnly} />
<StyledDivider />
<SidebarOwnerSection readOnly />
<SidebarOwnerSection readOnly={readOnly} />
<StyledDivider />
<SidebarTagsSection readOnly properties={{ hasTags: true, hasTerms: true }} />
<SidebarTagsSection readOnly={readOnly} properties={{ hasTags: true, hasTerms: true }} />
<StyledDivider />
<SidebarDomainSection readOnly />
<SidebarDomainSection readOnly={readOnly} />
<StyledDivider />
<DataProductSection readOnly />
<DataProductSection readOnly={readOnly} />
</>
)}
</EntityContext.Provider>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ describe('encodeSchemaField', () => {
});

describe('getPopulatedColumnsByUrn', () => {
it('should update columns by urn with data job fine grained data so that the data job appears to have the upstream columns', () => {
it('should update columns by urn with data job fine grained data so that the data job appears to have the upstream and downstream columns', () => {
const dataJobWithCLL = {
...dataJob1,
name: '',
Expand Down Expand Up @@ -116,12 +116,24 @@ describe('getPopulatedColumnsByUrn', () => {
recursive: false,
type: SchemaFieldDataType.String,
},
{
fieldPath: 'test2',
nullable: false,
recursive: false,
type: SchemaFieldDataType.String,
},
{
fieldPath: 'test3',
nullable: false,
recursive: false,
type: SchemaFieldDataType.String,
},
{
fieldPath: 'test4',
nullable: false,
recursive: false,
type: SchemaFieldDataType.String,
},
],
});
});
Expand Down
16 changes: 13 additions & 3 deletions datahub-web-react/src/app/lineage/utils/columnLineageUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ export function getPopulatedColumnsByUrn(
),
};
} else if (fetchedEntity.type === EntityType.DataJob && fetchedEntity.fineGrainedLineages) {
// Add upstream fields from fineGrainedLineage onto DataJob to mimic upstream dataset fields.
// DataJobs will virtually "have" these fields so we can draw full column paths
// from upstream dataset fields to downstream dataset fields.
// Add upstream and downstream fields from fineGrainedLineage onto DataJob to mimic upstream
// and downstream dataset fields. DataJobs will virtually "have" these fields so we can draw
// full column paths from upstream dataset fields to downstream dataset fields.
const fields: SchemaField[] = [];
fetchedEntity.fineGrainedLineages.forEach((fineGrainedLineage) => {
fineGrainedLineage.upstreams?.forEach((upstream) => {
Expand All @@ -103,6 +103,16 @@ export function getPopulatedColumnsByUrn(
});
}
});
fineGrainedLineage.downstreams?.forEach((downstream) => {
if (!fields.some((field) => field.fieldPath === downstream.path)) {
fields.push({
fieldPath: downgradeV2FieldPath(downstream.path) || '',
nullable: false,
recursive: false,
type: SchemaFieldDataType.String,
});
}
});
});
populatedColumnsByUrn = { ...populatedColumnsByUrn, [urn]: fields };
}
Expand Down
12 changes: 12 additions & 0 deletions datahub-web-react/src/app/lineage/utils/extendAsyncEntities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,18 @@ export function extendColumnLineage(
});
});
});
if (lineageVizConfig.type === EntityType.DataJob && !fineGrainedLineage.upstreams?.length) {
fineGrainedLineage.downstreams?.forEach((downstream) => {
const [downstreamEntityUrn, downstreamField] = breakFieldUrn(downstream);
updateFineGrainedMap(
fineGrainedMap,
lineageVizConfig.urn,
downstreamField,
downstreamEntityUrn,
downstreamField,
);
});
}
});
}

Expand Down
2 changes: 1 addition & 1 deletion docker/airflow/local_airflow.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
:::caution

This feature is currently unmaintained. As of 0.10.0 the container described is not published alongside the DataHub CLI. If you'd like to use it, please reach out to us on the [community slack.](docs/slack.md)
This guide is currently unmaintained. As of 0.10.0 the container described is not published alongside the DataHub CLI. If you'd like to use it, please reach out to us on the [community slack.](docs/slack.md)

:::

Expand Down
2 changes: 1 addition & 1 deletion docs-website/generateDocsDir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ function list_markdown_files(): string[] {
.trim()
.split("\n");
let all_generated_markdown_files = execSync(
"cd .. && ls docs/generated/**/**/*.md"
"cd .. && ls docs/generated/**/**/*.md && ls docs/generated/**/*.md"
)
.toString()
.trim()
Expand Down
5 changes: 4 additions & 1 deletion docs-website/sidebars.js
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ module.exports = {
"docs/features/dataset-usage-and-query-history",
"docs/posts",
"docs/sync-status",
"docs/lineage/lineage-feature-guide",
"docs/generated/lineage/lineage-feature-guide",
{
type: "doc",
id: "docs/tests/metadata-tests",
Expand All @@ -446,6 +446,9 @@ module.exports = {
"docs/managed-datahub/observe/custom-sql-assertions",
],
},
{
Guides: ["docs/features/feature-guides/ui-lineage"],
},
],
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/act-on-metadata/impact-analysis.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,4 @@ We currently limit the list of dependencies to 10,000 records; we suggest applyi

### Related Features

* [DataHub Lineage](../lineage/lineage-feature-guide.md)
* [DataHub Lineage](../generated/lineage/lineage-feature-guide.md)
3 changes: 2 additions & 1 deletion docs/api/tutorials/lineage.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import TabItem from '@theme/TabItem';
## Why Would You Use Lineage?

Lineage is used to capture data dependencies within an organization. It allows you to track the inputs from which a data asset is derived, along with the data assets that depend on it downstream.
For more information about lineage, refer to [About DataHub Lineage](/docs/lineage/lineage-feature-guide.md).

For more information about lineage, refer to [About DataHub Lineage](/docs/generated/lineage/lineage-feature-guide.md).

### Goal Of This Guide

Expand Down
58 changes: 58 additions & 0 deletions docs/features/feature-guides/ui-lineage.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Managing Lineage via UI

## Viewing lineage
The UI shows the latest version of the lineage. The time picker can be used to filter out edges within the latest version to exclude those that were last updated outside of the time window. Selecting time windows in the patch will not show you historical lineages. It will only filter the view of the latest version of the lineage.

## Editing from Lineage Graph View

The first place that you can edit lineage for entities is from the Lineage Visualization screen. Click on the "Lineage" button on the top right of an entity's profile to get to this view.

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-viz-button.png"/>
</p>

Once you find the entity that you want to edit the lineage of, click on the three-dot menu dropdown to select whether you want to edit lineage in the upstream direction or the downstream direction.

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-lineage-menu.png"/>
</p>

If you want to edit upstream lineage for entities downstream of the center node or downstream lineage for entities upstream of the center node, you can simply re-center to focus on the node you want to edit. Once focused on the desired node, you can edit lineage in either direction.

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/focus-to-edit.png"/>
</p>

### Adding Lineage Edges

Once you click "Edit Upstream" or "Edit Downstream," a modal will open that allows you to manage lineage for the selected entity in the chosen direction. In order to add a lineage edge to a new entity, search for it by name in the provided search bar and select it. Once you're satisfied with everything you've added, click "Save Changes." If you change your mind, you can always cancel or exit without saving the changes you've made.

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/add-upstream.png"/>
</p>

### Removing Lineage Edges

You can remove lineage edges from the same modal used to add lineage edges. Find the edge(s) that you want to remove, and click the "X" on the right side of it. And just like adding, you need to click "Save Changes" to save and if you exit without saving, your changes won't be applied.

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/remove-lineage-edge.png"/>
</p>

### Reviewing Changes

Any time lineage is edited manually, we keep track of who made the change and when they made it. You can see this information in the modal where you add and remove edges. If an edge was added manually, a user avatar will be in line with the edge that was added. You can hover over this avatar in order to see who added it and when.

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/lineage-edge-audit-stamp.png"/>
</p>

## Editing from Lineage Tab

The other place that you can edit lineage for entities is from the Lineage Tab on an entity's profile. Click on the "Lineage" tab in an entity's profile and then find the "Edit" dropdown that allows you to edit upstream or downstream lineage for the given entity.

<p align="center">
<img width="70%" src="https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/lineage/edit-from-lineage-tab.png"/>
</p>

Using the modal from this view will work the same as described above for editing from the Lineage Visualization screen.
2 changes: 1 addition & 1 deletion docs/how/add-custom-data-platform.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ datahub put platform --name MyCustomDataPlatform --display_name "My Custom Data
source:
type: "file"
config:
filename: "./my-custom-data-platform.json"
path: "./my-custom-data-platform.json"
# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
sink:
Expand Down
2 changes: 1 addition & 1 deletion docs/how/add-user-data.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Define an [ingestion recipe](https://datahubproject.io/docs/metadata-ingestion/#
source:
type: "file"
config:
filename: "./my-user.json"
path: "./my-user.json"
# see https://datahubproject.io/docs/metadata-ingestion/sink_docs/datahub for complete documentation
sink:
Expand Down
3 changes: 3 additions & 0 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
## Next

### Breaking Changes

- #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.
- #8853 - The Airflow plugin no longer supports Airflow 2.0.x or Python 3.7. See the docs for more details.
- #8853 - Introduced the Airflow plugin v2. If you're using Airflow 2.3+, the v2 plugin will be enabled by default, and so you'll need to switch your requirements to include `pip install 'acryl-datahub-airflow-plugin[plugin-v2]'`. To continue using the v1 plugin, set the `DATAHUB_AIRFLOW_PLUGIN_USE_V1_PLUGIN` environment variable to `true`.

### Potential Downtime

Expand Down
Loading

0 comments on commit 4ece833

Please sign in to comment.