From 75b36c41ee4fd74891b1bfe37885b4cd840e2906 Mon Sep 17 00:00:00 2001
From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com>
Date: Thu, 19 Oct 2023 08:32:24 -0700
Subject: [PATCH 01/11] docs(protobuf) Update messaging around nesting messages
(#9048)
---
metadata-integration/java/datahub-protobuf/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/metadata-integration/java/datahub-protobuf/README.md b/metadata-integration/java/datahub-protobuf/README.md
index daea8d438679c..29b82aa3e68f5 100644
--- a/metadata-integration/java/datahub-protobuf/README.md
+++ b/metadata-integration/java/datahub-protobuf/README.md
@@ -1,6 +1,6 @@
# Protobuf Schemas
-The `datahub-protobuf` module is designed to be used with the Java Emitter, the input is a compiled protobuf binary `*.protoc` files and optionally the corresponding `*.proto` source code. In addition, you can supply the root message in cases where a single protobuf source file includes multiple non-nested messages.
+The `datahub-protobuf` module is designed to be used with the Java Emitter, the input is a compiled protobuf binary `*.protoc` files and optionally the corresponding `*.proto` source code. You can supply a file with multiple nested messages to be processed. If you have a file with multiple non-nested messages, you will need to separate them out into different files or supply the root message, as otherwise we will only process the first one.
## Supported Features
From b1abd38a6b4aef3da0c50ecd23612cae7e3c5d28 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Thu, 19 Oct 2023 15:33:54 -0400
Subject: [PATCH 02/11] refactor(): Use data-testids for glossary_navigation
and dataset_ownership tests (#9033)
---
.../CreateGlossaryEntityModal.tsx | 7 ++-
.../shared/EntityDropdown/EntityDropdown.tsx | 3 +-
.../MoveGlossaryEntityModal.tsx | 5 +-
.../Ownership/sidebar/SidebarOwnerSection.tsx | 6 +-
.../src/app/glossary/BusinessGlossaryPage.tsx | 4 +-
.../src/app/glossary/GlossarySidebar.tsx | 2 +-
.../e2e/glossary/glossary_navigation.js | 55 +++++++++++--------
.../cypress/e2e/lineage/lineage_graph.js | 2 -
.../e2e/mutations/dataset_ownership.js | 2 +-
.../tests/cypress/cypress/support/commands.js | 1 +
10 files changed, 54 insertions(+), 33 deletions(-)
diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx
index d48ead2f5863e..9788d36af2c65 100644
--- a/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/CreateGlossaryEntityModal.tsx
@@ -112,7 +112,11 @@ function CreateGlossaryEntityModal(props: Props) {
-
- Move
+
+ Move
+
>
}
>
diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection.tsx
index 57743d0531afe..aa9a337d4ba44 100644
--- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection.tsx
+++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection.tsx
@@ -70,7 +70,11 @@ export const SidebarOwnerSection = ({ properties, readOnly }: Props) => {
)}
{!readOnly && (
- setShowAddModal(true)}>
+ setShowAddModal(true)}
+ data-testid="add-owners-button"
+ >
Add Owners
)}
diff --git a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx
index 11f54cb5078e6..a5262265fd23d 100644
--- a/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx
+++ b/datahub-web-react/src/app/glossary/BusinessGlossaryPage.tsx
@@ -92,11 +92,12 @@ function BusinessGlossaryPage() {
{(termsError || nodesError) && (
)}
-
+ Business Glossary
0)}
onClick={() => onClickCreate(false)}
>
diff --git a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx
index 4ddeb7b492595..bee9b04cee100 100644
--- a/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx
+++ b/datahub-web-react/src/app/ingest/source/builder/RecipeBuilder.tsx
@@ -86,10 +86,20 @@ function RecipeBuilder(props: Props) {
{sourceConfigs?.displayName} Recipe
- switchViews(true)}>
+ switchViews(true)}
+ data-testid="recipe-builder-form-button"
+ >
Form
- switchViews(false)}>
+ switchViews(false)}
+ data-testid="recipe-builder-yaml-button"
+ >
YAML
@@ -114,7 +124,9 @@ function RecipeBuilder(props: Props) {
Previous
- Next
+
+ Next
+
>
)}
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js
new file mode 100644
index 0000000000000..6c5dd77810644
--- /dev/null
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js
@@ -0,0 +1,68 @@
+
+const number = Math.floor(Math.random() * 100000);
+const accound_id = `account${number}`;
+const warehouse_id = `warehouse${number}`;
+const username = `user${number}`;
+const password = `password${number}`;
+const role = `role${number}`;
+const ingestion_source_name = `ingestion source ${number}`;
+
+describe("ingestion source creation flow", () => {
+ it("create a ingestion source using ui, verify ingestion source details saved correctly, remove ingestion source", () => {
+ // Go to ingestion page, create a snowflake source
+ cy.loginWithCredentials();
+ cy.goToIngestionPage();
+ cy.clickOptionWithTestId("create-ingestion-source-button");
+ cy.clickOptionWithText("Snowflake");
+ cy.waitTextVisible("Snowflake Recipe");
+ cy.get("#account_id").type(accound_id);
+ cy.get("#warehouse").type(warehouse_id);
+ cy.get("#username").type(username);
+ cy.get("#password").type(password);
+ cy.focused().blur();
+ cy.get("#role").type(role);
+
+ // Verify yaml recipe is generated correctly
+ cy.clickOptionWithTestId("recipe-builder-yaml-button");
+ cy.waitTextVisible("account_id");
+ cy.waitTextVisible(accound_id);
+ cy.waitTextVisible(warehouse_id);
+ cy.waitTextVisible(username);
+ cy.waitTextVisible(password);
+ cy.waitTextVisible(role);
+
+ // Finish creating source
+ cy.clickOptionWithTestId("recipe-builder-next-button");
+ cy.waitTextVisible("Configure an Ingestion Schedule");
+ cy.clickOptionWithTestId("ingestion-schedule-next-button");
+ cy.waitTextVisible("Give this ingestion source a name.");
+ cy.get('[data-testid="source-name-input"]').type(ingestion_source_name);
+ cy.clickOptionWithTestId("ingestion-source-save-button");
+ cy.waitTextVisible("Successfully created ingestion source!").wait(5000)
+ cy.waitTextVisible(ingestion_source_name);
+ cy.get('[data-testid="ingestion-source-table-status"]').contains("Pending...").should("be.visible");
+
+ // Verify ingestion source details are saved correctly
+ cy.get('[data-testid="ingestion-source-table-edit-button"]').first().click();
+ cy.waitTextVisible("Edit Ingestion Source");
+ cy.get("#account_id").should("have.value", accound_id);
+ cy.get("#warehouse").should("have.value", warehouse_id);
+ cy.get("#username").should("have.value", username);
+ cy.get("#password").should("have.value", password);
+ cy.get("#role").should("have.value", role);
+ cy.get("button").contains("Next").click();
+ cy.waitTextVisible("Configure an Ingestion Schedule");
+ cy.clickOptionWithTestId("ingestion-schedule-next-button");
+ cy.get('[data-testid="source-name-input"]').clear().type(ingestion_source_name + " EDITED");
+ cy.clickOptionWithTestId("ingestion-source-save-button");
+ cy.waitTextVisible("Successfully updated ingestion source!");
+ cy.waitTextVisible(ingestion_source_name + " EDITED");
+
+ // Remove ingestion source
+ cy.get('[data-testid="delete-button"]').first().click();
+ cy.waitTextVisible("Confirm Ingestion Source Removal");
+ cy.get("button").contains("Yes").click();
+ cy.waitTextVisible("Removed ingestion source.");
+ cy.ensureTextNotPresent(ingestion_source_name + " EDITED")
+ })
+});
\ No newline at end of file
From 2fea466d48c856f5c469af6f611990a200e5bece Mon Sep 17 00:00:00 2001
From: Harshal Sheth
Date: Fri, 20 Oct 2023 13:47:52 -0700
Subject: [PATCH 05/11] docs: fix lineage capability annotations (#8954)
---
.../src/datahub/ingestion/source/aws/glue.py | 1 +
.../datahub/ingestion/source/bigquery_v2/bigquery.py | 1 +
.../src/datahub/ingestion/source/kafka_connect.py | 1 +
.../datahub/ingestion/source/looker/looker_source.py | 6 +++++-
.../src/datahub/ingestion/source/metabase.py | 1 +
.../src/datahub/ingestion/source/metadata/lineage.py | 10 +++++++++-
.../src/datahub/ingestion/source/mode.py | 1 +
.../src/datahub/ingestion/source/nifi.py | 4 +++-
.../src/datahub/ingestion/source/powerbi/powerbi.py | 5 ++++-
.../src/datahub/ingestion/source/sql_queries.py | 10 +++++++++-
.../src/datahub/ingestion/source/superset.py | 1 +
.../src/datahub/ingestion/source/tableau.py | 6 +++++-
12 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
index e5dff786b71d1..aa7e5aa352a3e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
@@ -221,6 +221,7 @@ def report_table_dropped(self, table: str) -> None:
SourceCapability.DELETION_DETECTION,
"Enabled by default when stateful ingestion is turned on.",
)
+@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
class GlueSource(StatefulIngestionSourceBase):
"""
Note: if you also have files in S3 that you'd like to ingest, we recommend you use Glue's built-in data catalog. See [here](../../../../docs/generated/ingestion/sources/s3.md) for a quick guide on how to set up a crawler on Glue and ingest the outputs with DataHub.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
index 552612f877b9a..692d8c4f81bb6 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py
@@ -153,6 +153,7 @@ def cleanup(config: BigQueryV2Config) -> None:
)
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default")
@capability(SourceCapability.LINEAGE_COARSE, "Optionally enabled via configuration")
+@capability(SourceCapability.LINEAGE_FINE, "Optionally enabled via configuration")
@capability(
SourceCapability.USAGE_STATS,
"Enabled by default, can be disabled via configuration `include_usage_statistics`",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
index 5fae0ee5215a3..1a1e012e80633 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py
@@ -1096,6 +1096,7 @@ def transform_connector_config(
@config_class(KafkaConnectSourceConfig)
@support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
class KafkaConnectSource(StatefulIngestionSourceBase):
config: KafkaConnectSourceConfig
report: KafkaConnectSourceReport
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index 8297a0aa8efa7..a3df977582ca4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -103,6 +103,11 @@
@capability(
SourceCapability.OWNERSHIP, "Enabled by default, configured using `extract_owners`"
)
+@capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
+@capability(
+ SourceCapability.LINEAGE_FINE,
+ "Enabled by default, configured using `extract_column_level_lineage`",
+)
@capability(
SourceCapability.USAGE_STATS,
"Enabled by default, configured using `extract_usage_history`",
@@ -1128,7 +1133,6 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
def emit_independent_looks_mcp(
self, dashboard_element: LookerDashboardElement
) -> Iterable[MetadataWorkUnit]:
-
yield from auto_workunit(
stream=self._make_chart_metadata_events(
dashboard_element=dashboard_element,
diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
index fb4512893feb1..24145d60210ff 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py
@@ -80,6 +80,7 @@ def remove_trailing_slash(cls, v):
@config_class(MetabaseConfig)
@support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
class MetabaseSource(Source):
"""
This plugin extracts Charts, dashboards, and associated metadata. This plugin is in beta and has only been tested
diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py
index 1c0c809c16a60..f33c6e0edae3d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/lineage.py
@@ -23,11 +23,17 @@
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.decorators import (
SupportStatus,
+ capability,
config_class,
platform_name,
support_status,
)
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport
+from datahub.ingestion.api.source import (
+ MetadataWorkUnitProcessor,
+ Source,
+ SourceCapability,
+ SourceReport,
+)
from datahub.ingestion.api.source_helpers import (
auto_status_aspect,
auto_workunit_reporter,
@@ -121,6 +127,8 @@ def version_must_be_1(cls, v):
@platform_name("File Based Lineage")
@config_class(LineageFileSourceConfig)
@support_status(SupportStatus.CERTIFIED)
+@capability(SourceCapability.LINEAGE_COARSE, "Specified in the lineage file.")
+@capability(SourceCapability.LINEAGE_FINE, "Specified in the lineage file.")
@dataclass
class LineageFileSource(Source):
"""
diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py
index a000c66a406c2..c46b56da422d9 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/mode.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py
@@ -98,6 +98,7 @@ class HTTPError429(HTTPError):
@config_class(ModeConfig)
@support_status(SupportStatus.CERTIFIED)
@capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
+@capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
class ModeSource(Source):
"""
diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
index ac1e03812db3b..bc05edbb3c623 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py
@@ -26,11 +26,12 @@
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.decorators import (
SupportStatus,
+ capability,
config_class,
platform_name,
support_status,
)
-from datahub.ingestion.api.source import Source, SourceReport
+from datahub.ingestion.api.source import Source, SourceCapability, SourceReport
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.metadata.schema_classes import (
DataFlowInfoClass,
@@ -360,6 +361,7 @@ def report_dropped(self, ent_name: str) -> None:
@platform_name("NiFi", id="nifi")
@config_class(NifiSourceConfig)
@support_status(SupportStatus.CERTIFIED)
+@capability(SourceCapability.LINEAGE_COARSE, "Supported. See docs for limitations")
class NifiSource(Source):
"""
This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
index 52bcef66658c8..4611a8eed4782 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py
@@ -264,7 +264,6 @@ def extract_lineage(
)
if len(upstream) > 0:
-
upstream_lineage_class: UpstreamLineageClass = UpstreamLineageClass(
upstreams=upstream,
fineGrainedLineages=cll_lineage or None,
@@ -1139,6 +1138,10 @@ def report_to_datahub_work_units(
SourceCapability.OWNERSHIP,
"Disabled by default, configured using `extract_ownership`",
)
+@capability(
+ SourceCapability.LINEAGE_COARSE,
+ "Enabled by default, configured using `extract_lineage`.",
+)
@capability(
SourceCapability.LINEAGE_FINE,
"Disabled by default, configured using `extract_column_level_lineage`. ",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
index bce4d1ec76e6e..fcf97e461967c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py
@@ -20,11 +20,17 @@
from datahub.ingestion.api.common import PipelineContext
from datahub.ingestion.api.decorators import (
SupportStatus,
+ capability,
config_class,
platform_name,
support_status,
)
-from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport
+from datahub.ingestion.api.source import (
+ MetadataWorkUnitProcessor,
+ Source,
+ SourceCapability,
+ SourceReport,
+)
from datahub.ingestion.api.source_helpers import auto_workunit_reporter
from datahub.ingestion.api.workunit import MetadataWorkUnit
from datahub.ingestion.graph.client import DataHubGraph
@@ -83,6 +89,8 @@ def compute_stats(self) -> None:
@platform_name("SQL Queries")
@config_class(SqlQueriesSourceConfig)
@support_status(SupportStatus.TESTING)
+@capability(SourceCapability.LINEAGE_COARSE, "Parsed from SQL queries")
+@capability(SourceCapability.LINEAGE_FINE, "Parsed from SQL queries")
class SqlQueriesSource(Source):
# TODO: Documentation
urns: Optional[Set[str]]
diff --git a/metadata-ingestion/src/datahub/ingestion/source/superset.py b/metadata-ingestion/src/datahub/ingestion/source/superset.py
index 14bc4242d2a91..e491a1e8b82fa 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/superset.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/superset.py
@@ -142,6 +142,7 @@ def get_filter_name(filter_obj):
@capability(
SourceCapability.DELETION_DETECTION, "Optionally enabled via stateful_ingestion"
)
+@capability(SourceCapability.LINEAGE_COARSE, "Supported by default")
class SupersetSource(StatefulIngestionSourceBase):
"""
This plugin extracts the following:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
index bad7ae49d325e..4bc40b0aac964 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py
@@ -452,6 +452,10 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
@capability(SourceCapability.OWNERSHIP, "Requires recipe configuration")
@capability(SourceCapability.TAGS, "Requires recipe configuration")
@capability(SourceCapability.LINEAGE_COARSE, "Enabled by default")
+@capability(
+ SourceCapability.LINEAGE_FINE,
+ "Enabled by default, configure using `extract_column_level_lineage`",
+)
class TableauSource(StatefulIngestionSourceBase):
platform = "tableau"
@@ -533,7 +537,7 @@ def fetch_projects():
path=[],
)
# Set parent project name
- for project_id, project in all_project_map.items():
+ for _project_id, project in all_project_map.items():
if (
project.parent_id is not None
and project.parent_id in all_project_map
From 4d35a254cabb3a6241af8857c7d63298783ebaa7 Mon Sep 17 00:00:00 2001
From: Kos Korchak <97058061+kkorchak@users.noreply.github.com>
Date: Fri, 20 Oct 2023 17:09:14 -0400
Subject: [PATCH 06/11] =?UTF-8?q?Added=20more=20data-testid=20usage=20for?=
=?UTF-8?q?=20edit=5Fdocumentation=20and=20managing=5Fsecr=E2=80=A6=20(#90?=
=?UTF-8?q?60)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../components/legacy/DescriptionModal.tsx | 6 ++-
.../shared/components/styled/AddLinkModal.tsx | 6 ++-
.../tabs/Documentation/DocumentationTab.tsx | 1 +
.../components/DescriptionEditorToolbar.tsx | 2 +-
.../app/ingest/secret/SecretBuilderModal.tsx | 4 ++
.../src/app/ingest/secret/SecretsList.tsx | 6 ++-
.../e2e/glossary/glossary_navigation.js | 3 +-
.../e2e/mutations/edit_documentation.js | 42 +++++++--------
.../cypress/e2e/mutations/managing_secrets.js | 51 ++++++++++---------
9 files changed, 70 insertions(+), 51 deletions(-)
diff --git a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx
index 579b8c9905da0..cb37c44a36caa 100644
--- a/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/legacy/DescriptionModal.tsx
@@ -41,7 +41,11 @@ export default function UpdateDescriptionModal({ title, description, original, o
footer={
<>
Cancel
- onSubmit(updatedDesc)} disabled={updatedDesc === description}>
+ onSubmit(updatedDesc)}
+ disabled={updatedDesc === description}
+ data-testid="description-modal-update-button"
+ >
Update
>
diff --git a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx
index 34d4f0cb3fe91..68a8cf4094362 100644
--- a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx
+++ b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx
@@ -57,7 +57,7 @@ export const AddLinkModal = ({ buttonProps, refetch }: AddLinkProps) => {
return (
<>
- } onClick={showModal} {...buttonProps}>
+ } onClick={showModal} {...buttonProps}>
Add Link
{
Cancel
,
-
+
Add
,
]}
>
{
{
routeToTab({ tabName: 'Documentation', tabParams: { editing: true } })}
>
diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditorToolbar.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditorToolbar.tsx
index 6128a5f277c85..07e197049ccc8 100644
--- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditorToolbar.tsx
+++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/DescriptionEditorToolbar.tsx
@@ -15,7 +15,7 @@ export const DescriptionEditorToolbar = ({ disableSave, onClose, onSave }: Descr
Back
-
+ Save
diff --git a/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx b/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx
index 539eef972608c..30f04d61b8fc9 100644
--- a/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx
+++ b/datahub-web-react/src/app/ingest/secret/SecretBuilderModal.tsx
@@ -40,6 +40,7 @@ export const SecretBuilderModal = ({ initialState, visible, onSubmit, onCancel }
Cancel
onSubmit?.(
@@ -71,6 +72,7 @@ export const SecretBuilderModal = ({ initialState, visible, onSubmit, onCancel }
Give your secret a name. This is what you'll use to reference the secret from your recipes.
{
- setIsCreatingSecret(true)}>
+ setIsCreatingSecret(true)}
+ >
Create new secret
diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
index f52e4d3984a88..aeceaf99be889 100644
--- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js
@@ -17,8 +17,7 @@ describe("glossary sidebar navigation test", () => {
cy.waitTextVisible("Created Term Group!");
cy.waitTextVisible("Create Glossary Term");
cy.enterTextInTestId("create-glossary-entity-modal-name", glossaryTerm);
- cy.clickOptionWithTestId("glossary-entity-modal-create-button");
- cy.waitTextVisible("Created Glossary Term!");
+ cy.clickOptionWithTestId("glossary-entity-modal-create-button").wait(3000);
cy.get('[data-testid="glossary-browser-sidebar"]').contains(glossaryTerm).click().wait(3000);
cy.openThreeDotDropdown();
cy.clickOptionWithTestId("entity-menu-move-button")
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
index 83b66e2cb2549..5f9758a35ca0e 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js
@@ -10,20 +10,20 @@ describe("edit documentation and link to dataset", () => {
cy.visit(
"/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"
);
- cy.get("[role='tab']").contains("Documentation").click();
+ cy.openEntityTab("Documentation");
cy.waitTextVisible("my hive dataset");
cy.waitTextVisible("Sample doc");
- cy.clickOptionWithText("Edit");
+ cy.clickOptionWithTestId("edit-documentation-button");
cy.focused().clear();
cy.focused().type(documentation_edited);
- cy.get("button").contains("Save").click();
+ cy.clickOptionWithTestId("description-editor-save-button");
cy.waitTextVisible("Description Updated");
cy.waitTextVisible(documentation_edited);
//return documentation to original state
- cy.clickOptionWithText("Edit");
+ cy.clickOptionWithTestId("edit-documentation-button");
cy.focused().clear().wait(1000);
cy.focused().type("my hive dataset");
- cy.get("button").contains("Save").click();
+ cy.clickOptionWithTestId("description-editor-save-button");
cy.waitTextVisible("Description Updated");
cy.waitTextVisible("my hive dataset");
});
@@ -33,21 +33,21 @@ describe("edit documentation and link to dataset", () => {
cy.visit(
"/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema"
);
- cy.get("[role='tab']").contains("Documentation").click();
+ cy.openEntityTab("Documentation");
cy.contains("Sample doc").trigger("mouseover", { force: true });
cy.get('[data-icon="delete"]').click();
cy.waitTextVisible("Link Removed");
- cy.get("button").contains("Add Link").click().wait(1000);
- cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url);
+ cy.clickOptionWithTestId("add-link-button").wait(1000);
+ cy.enterTextInTestId("add-link-modal-url", wrong_url);
cy.waitTextVisible("This field must be a valid url.");
cy.focused().clear();
cy.waitTextVisible("A URL is required.");
- cy.focused().type(correct_url);
+ cy.enterTextInTestId("add-link-modal-url", correct_url);
cy.ensureTextNotPresent("This field must be a valid url.");
- cy.get("#addLinkForm_label").type("Sample doc");
- cy.get('[role="dialog"] button').contains("Add").click();
+ cy.enterTextInTestId("add-link-modal-label", "Sample doc");
+ cy.clickOptionWithTestId("add-link-modal-add-button");
cy.waitTextVisible("Link Added");
- cy.get("[role='tab']").contains("Documentation").click();
+ cy.openEntityTab("Documentation");
cy.get(`[href='${correct_url}']`).should("be.visible");
});
@@ -55,18 +55,18 @@ describe("edit documentation and link to dataset", () => {
cy.loginWithCredentials();
cy.visit("/domain/urn:li:domain:marketing/Entities");
cy.waitTextVisible("SampleCypressKafkaDataset");
- cy.get("button").contains("Add Link").click().wait(1000);
- cy.get('[role="dialog"] #addLinkForm_url').type(wrong_url);
+ cy.clickOptionWithTestId("add-link-button").wait(1000);
+ cy.enterTextInTestId("add-link-modal-url", wrong_url);
cy.waitTextVisible("This field must be a valid url.");
cy.focused().clear();
cy.waitTextVisible("A URL is required.");
- cy.focused().type(correct_url);
+ cy.enterTextInTestId("add-link-modal-url", correct_url);
cy.ensureTextNotPresent("This field must be a valid url.");
- cy.get("#addLinkForm_label").type("Sample doc");
- cy.get('[role="dialog"] button').contains("Add").click();
+ cy.enterTextInTestId("add-link-modal-label", "Sample doc");
+ cy.clickOptionWithTestId("add-link-modal-add-button");
cy.waitTextVisible("Link Added");
- cy.get("[role='tab']").contains("Documentation").click();
- cy.waitTextVisible("Edit");
+ cy.openEntityTab("Documentation");
+ cy.get("[data-testid='edit-documentation-button']").should("be.visible");
cy.get(`[href='${correct_url}']`).should("be.visible");
cy.contains("Sample doc").trigger("mouseover", { force: true });
cy.get('[data-icon="delete"]').click();
@@ -83,14 +83,14 @@ describe("edit documentation and link to dataset", () => {
cy.waitTextVisible("Foo field description has changed");
cy.focused().clear().wait(1000);
cy.focused().type(documentation_edited);
- cy.get("button").contains("Update").click();
+ cy.clickOptionWithTestId("description-modal-update-button");
cy.waitTextVisible("Updated!");
cy.waitTextVisible(documentation_edited);
cy.waitTextVisible("(edited)");
cy.get("tbody [data-icon='edit']").first().click({ force: true });
cy.focused().clear().wait(1000);
cy.focused().type("Foo field description has changed");
- cy.get("button").contains("Update").click();
+ cy.clickOptionWithTestId("description-modal-update-button");
cy.waitTextVisible("Updated!");
cy.waitTextVisible("Foo field description has changed");
cy.waitTextVisible("(edited)");
diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js
index 466bb2ef0757e..77fd63b9cae02 100644
--- a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js
+++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js
@@ -8,23 +8,24 @@ const ingestion_source_name = `ingestion source ${number}`;
describe("managing secrets for ingestion creation", () => {
it("create a secret, create ingestion source using a secret, remove a secret", () => {
+ // Navigate to the manage ingestion page → secrets
cy.loginWithCredentials();
- //navigate to the manage ingestion page → secrets
cy.goToIngestionPage();
- cy.clickOptionWithText("Secrets");
- //create a new secret
- cy.clickOptionWithText("Create new secret");
- cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible");
- cy.get('[role="dialog"] #name').type(`secretname${number}`);
- cy.get('[role="dialog"] #value').type(`secretvalue${number}`);
- cy.get('[role="dialog"] #description').type(`secretdescription${number}`);
- cy.get('#createSecretButton').click();
+ cy.openEntityTab("Secrets");
+
+ // Create a new secret
+ cy.clickOptionWithTestId("create-secret-button");
+ cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`);
+ cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`);
+ cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`);
+ cy.clickOptionWithTestId("secret-modal-create-button");
cy.waitTextVisible("Successfully created Secret!");
cy.waitTextVisible(`secretname${number}`);
- cy.waitTextVisible(`secretdescription${number}`).wait(5000)//prevent issue with missing secret
- //create an ingestion source using a secret
+ cy.waitTextVisible(`secretdescription${number}`).wait(5000)
+
+ // Create an ingestion source using a secret
cy.goToIngestionPage();
- cy.clickOptionWithText("Create new source");
+ cy.get("#ingestion-create-source").click();
cy.clickOptionWithText("Snowflake");
cy.waitTextVisible("Snowflake Recipe");
cy.get("#account_id").type(accound_id);
@@ -40,11 +41,12 @@ describe("managing secrets for ingestion creation", () => {
cy.waitTextVisible("Give this ingestion source a name.");
cy.get('[data-testid="source-name-input"]').type(ingestion_source_name);
cy.get("button").contains("Save").click();
- cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data
+ cy.waitTextVisible("Successfully created ingestion source!").wait(5000)
cy.waitTextVisible(ingestion_source_name);
cy.get("button").contains("Pending...").should("be.visible");
- //remove a secret
- cy.clickOptionWithText("Secrets");
+
+ // Remove a secret
+ cy.openEntityTab("Secrets");
cy.waitTextVisible(`secretname${number}`);
cy.get('[data-icon="delete"]').first().click();
cy.waitTextVisible("Confirm Secret Removal");
@@ -52,14 +54,16 @@ describe("managing secrets for ingestion creation", () => {
cy.waitTextVisible("Removed secret.");
cy.ensureTextNotPresent(`secretname${number}`);
cy.ensureTextNotPresent(`secretdescription${number}`);
- //remove ingestion source
+
+ // Remove ingestion source
cy.goToIngestionPage();
cy.get('[data-testid="delete-button"]').first().click();
cy.waitTextVisible("Confirm Ingestion Source Removal");
cy.get("button").contains("Yes").click();
cy.waitTextVisible("Removed ingestion source.");
cy.ensureTextNotPresent(ingestion_source_name)
- //verify secret is not present during ingestion source creation for password dropdown
+
+ // Verify secret is not present during ingestion source creation for password dropdown
cy.clickOptionWithText("Create new source");
cy.clickOptionWithText("Snowflake");
cy.waitTextVisible("Snowflake Recipe");
@@ -68,13 +72,13 @@ describe("managing secrets for ingestion creation", () => {
cy.get("#username").type(username);
cy.get("#password").click().wait(1000);
cy.ensureTextNotPresent(`secretname${number}`);
- //verify secret can be added during ingestion source creation and used successfully
+
+ // Verify secret can be added during ingestion source creation and used successfully
cy.clickOptionWithText("Create Secret");
- cy.get('[role="dialog"]').contains("Create a new Secret").should("be.visible");
- cy.get('[role="dialog"] #name').type(`secretname${number}`);
- cy.get('[role="dialog"] #value').type(`secretvalue${number}`);
- cy.get('[role="dialog"] #description').type(`secretdescription${number}`);
- cy.get('#createSecretButton').click();
+ cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`)
+ cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`)
+ cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`)
+ cy.clickOptionWithTestId("secret-modal-create-button");
cy.waitTextVisible("Created secret!");
cy.get("#role").type(role);
cy.get("button").contains("Next").click();
@@ -86,6 +90,7 @@ describe("managing secrets for ingestion creation", () => {
cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data
cy.waitTextVisible(ingestion_source_name);
cy.get("button").contains("Pending...").should("be.visible");
+
//Remove ingestion source and secret
cy.goToIngestionPage();
cy.get('[data-testid="delete-button"]').first().click();
From 63599c95553b89304b656efb2c208c9084d60717 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Sat, 21 Oct 2023 03:17:28 -0700
Subject: [PATCH 07/11] fix(search): fix mapping builder bug (#9062)
---
.../search/elasticsearch/indexbuilder/MappingsBuilder.java | 2 +-
.../timeseries/search/TimeseriesAspectServiceTestBase.java | 6 ++++--
.../io/datahubproject/test/search/SearchTestContainer.java | 2 +-
smoke-test/tests/containers/containers_test.py | 1 +
4 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
index 1edc77bbd214c..35cef71edd953 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilder.java
@@ -133,7 +133,7 @@ private static Map getMappingsForField(@Nonnull final Searchable
} else if (fieldType == FieldType.DATETIME) {
mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE);
} else if (fieldType == FieldType.OBJECT) {
- mappingForField.put(TYPE, ESUtils.DATE_FIELD_TYPE);
+ mappingForField.put(TYPE, ESUtils.OBJECT_FIELD_TYPE);
} else {
log.info("FieldType {} has no mappings implemented", fieldType);
}
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
index f9b8f84b10ad2..b19d2026fbfc4 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java
@@ -889,15 +889,17 @@ public void testCountByFilterAfterDelete() throws InterruptedException {
@Test(groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"})
public void testGetIndexSizes() {
List result = _elasticSearchTimeseriesAspectService.getIndexSizes();
+ //CHECKSTYLE:OFF
/*
Example result:
{aspectName=testentityprofile, sizeMb=52.234, indexName=es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1, entityName=testentity}
{aspectName=testentityprofile, sizeMb=0.208, indexName=es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1, entityName=testentitywithouttests}
*/
// There may be other indices in there from other tests, so just make sure that index for entity + aspect is in there
- assertTrue(result.size() > 1);
+ //CHECKSTYLE:ON
+ assertTrue(result.size() > 0);
assertTrue(
result.stream().anyMatch(idxSizeResult -> idxSizeResult.getIndexName().equals(
- "es_timeseries_aspect_service_test_testentitywithouttests_testentityprofileaspect_v1")));
+ "es_timeseries_aspect_service_test_testentity_testentityprofileaspect_v1")));
}
}
diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
index 67e1ee368f513..4c1555fc510e6 100644
--- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
+++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestContainer.java
@@ -5,7 +5,7 @@
import java.time.Duration;
public interface SearchTestContainer {
- String SEARCH_JAVA_OPTS = "-Xms64m -Xmx384m -XX:MaxDirectMemorySize=368435456";
+ String SEARCH_JAVA_OPTS = "-Xms446m -Xmx446m -XX:MaxDirectMemorySize=368435456";
Duration STARTUP_TIMEOUT = Duration.ofMinutes(5); // usually < 1min
GenericContainer> startContainer();
diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py
index 05a45239dabf8..227645a87d30a 100644
--- a/smoke-test/tests/containers/containers_test.py
+++ b/smoke-test/tests/containers/containers_test.py
@@ -227,6 +227,7 @@ def test_update_container(frontend_session, ingest_cleanup_data):
"ownerUrn": new_owner,
"resourceUrn": container_urn,
"ownerEntityType": "CORP_USER",
+ "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner"
}
},
}
From 86e0023a4e158467130f7337478a48bf98fb344b Mon Sep 17 00:00:00 2001
From: Pedro Silva
Date: Sat, 21 Oct 2023 16:20:59 +0100
Subject: [PATCH 08/11] feat(ingestion): Adds more advanced configurations for
runtime debugging (#8998)
---
.../ingest/IngestionResolverUtils.java | 10 ++
...eateIngestionExecutionRequestResolver.java | 3 +
.../source/UpsertIngestionSourceResolver.java | 10 ++
.../src/main/resources/ingestion.graphql | 10 ++
.../UpsertIngestionSourceResolverTest.java | 2 +-
.../app/ingest/source/IngestionSourceList.tsx | 8 +-
.../ingest/source/builder/NameSourceStep.tsx | 123 +++++++++++++++++-
.../src/app/ingest/source/builder/types.ts | 17 +++
.../src/graphql/ingestion.graphql | 8 ++
docker/build.gradle | 12 +-
docs/ui-ingestion.md | 20 ++-
.../docs/dev_guides/profiling_ingestions.md | 39 ++++++
.../TimeseriesAspectServiceTestBase.java | 6 +-
.../test/search/SearchTestContainer.java | 2 +
.../ingestion/DataHubIngestionSourceInfo.pdl | 13 +-
15 files changed, 267 insertions(+), 16 deletions(-)
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java
index 7db0b6f826a04..1140c031f1d35 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java
@@ -5,6 +5,7 @@
import com.linkedin.datahub.graphql.generated.IngestionConfig;
import com.linkedin.datahub.graphql.generated.IngestionSchedule;
import com.linkedin.datahub.graphql.generated.IngestionSource;
+import com.linkedin.datahub.graphql.generated.StringMapEntry;
import com.linkedin.datahub.graphql.generated.StructuredReport;
import com.linkedin.datahub.graphql.types.common.mappers.StringMapMapper;
import com.linkedin.entity.EntityResponse;
@@ -21,6 +22,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
@@ -143,6 +145,14 @@ public static IngestionConfig mapIngestionSourceConfig(final DataHubIngestionSou
result.setVersion(config.getVersion());
result.setExecutorId(config.getExecutorId());
result.setDebugMode(config.isDebugMode());
+ if (config.getExtraArgs() != null) {
+ List extraArgs = config.getExtraArgs()
+ .keySet()
+ .stream()
+ .map(key -> new StringMapEntry(key, config.getExtraArgs().get(key)))
+ .collect(Collectors.toList());
+ result.setExtraArgs(extraArgs);
+ }
return result;
}
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java
index e5064e6620526..ea20b837e0a1f 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java
@@ -117,6 +117,9 @@ public CompletableFuture get(final DataFetchingEnvironment environment)
if (ingestionSourceInfo.getConfig().hasDebugMode()) {
debugMode = ingestionSourceInfo.getConfig().isDebugMode() ? "true" : "false";
}
+ if (ingestionSourceInfo.getConfig().hasExtraArgs()) {
+ arguments.putAll(ingestionSourceInfo.getConfig().getExtraArgs());
+ }
arguments.put(DEBUG_MODE_ARG_NAME, debugMode);
execInput.setArgs(new StringMap(arguments));
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java
index 2ce394ad5ba84..68e334bd976f8 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolver.java
@@ -1,10 +1,12 @@
package com.linkedin.datahub.graphql.resolvers.ingest.source;
import com.linkedin.common.urn.Urn;
+import com.linkedin.data.template.StringMap;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.exception.AuthorizationException;
import com.linkedin.datahub.graphql.exception.DataHubGraphQLErrorCode;
import com.linkedin.datahub.graphql.exception.DataHubGraphQLException;
+import com.linkedin.datahub.graphql.generated.StringMapEntryInput;
import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceConfigInput;
import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceInput;
import com.linkedin.datahub.graphql.generated.UpdateIngestionSourceScheduleInput;
@@ -17,6 +19,8 @@
import com.linkedin.mxe.MetadataChangeProposal;
import graphql.schema.DataFetcher;
import graphql.schema.DataFetchingEnvironment;
+import java.util.Map;
+import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import java.net.URISyntaxException;
@@ -108,6 +112,12 @@ private DataHubIngestionSourceConfig mapConfig(final UpdateIngestionSourceConfig
if (input.getDebugMode() != null) {
result.setDebugMode(input.getDebugMode());
}
+ if (input.getExtraArgs() != null) {
+ Map extraArgs = input.getExtraArgs()
+ .stream()
+ .collect(Collectors.toMap(StringMapEntryInput::getKey, StringMapEntryInput::getValue));
+ result.setExtraArgs(new StringMap(extraArgs));
+ }
return result;
}
diff --git a/datahub-graphql-core/src/main/resources/ingestion.graphql b/datahub-graphql-core/src/main/resources/ingestion.graphql
index 69c8aff124583..21f9fb2633119 100644
--- a/datahub-graphql-core/src/main/resources/ingestion.graphql
+++ b/datahub-graphql-core/src/main/resources/ingestion.graphql
@@ -332,6 +332,11 @@ type IngestionConfig {
Advanced: Whether or not to run ingestion in debug mode
"""
debugMode: Boolean
+
+ """
+ Advanced: Extra arguments for the ingestion run.
+ """
+ extraArgs: [StringMapEntry!]
}
"""
@@ -483,6 +488,11 @@ input UpdateIngestionSourceConfigInput {
Whether or not to run ingestion in debug mode
"""
debugMode: Boolean
+
+ """
+ Extra arguments for the ingestion run.
+ """
+ extraArgs: [StringMapEntryInput!]
}
"""
diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java
index 2538accc694fb..16d8da9169a8f 100644
--- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java
+++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/source/UpsertIngestionSourceResolverTest.java
@@ -26,7 +26,7 @@ public class UpsertIngestionSourceResolverTest {
"Test source",
"mysql", "Test source description",
new UpdateIngestionSourceScheduleInput("* * * * *", "UTC"),
- new UpdateIngestionSourceConfigInput("my test recipe", "0.8.18", "executor id", false)
+ new UpdateIngestionSourceConfigInput("my test recipe", "0.8.18", "executor id", false, null)
);
@Test
diff --git a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx
index 0e341a5ff3a79..13af19b0b6ac2 100644
--- a/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx
+++ b/datahub-web-react/src/app/ingest/source/IngestionSourceList.tsx
@@ -15,7 +15,7 @@ import { Message } from '../../shared/Message';
import TabToolbar from '../../entity/shared/components/styled/TabToolbar';
import { IngestionSourceBuilderModal } from './builder/IngestionSourceBuilderModal';
import { addToListIngestionSourcesCache, CLI_EXECUTOR_ID, removeFromListIngestionSourcesCache } from './utils';
-import { DEFAULT_EXECUTOR_ID, SourceBuilderState } from './builder/types';
+import { DEFAULT_EXECUTOR_ID, SourceBuilderState, StringMapEntryInput } from './builder/types';
import { IngestionSource, UpdateIngestionSourceInput } from '../../../types.generated';
import { SearchBar } from '../../search/SearchBar';
import { useEntityRegistry } from '../../useEntityRegistry';
@@ -173,6 +173,11 @@ export const IngestionSourceList = () => {
setFocusSourceUrn(undefined);
};
+ const formatExtraArgs = (extraArgs): StringMapEntryInput[] => {
+ if (extraArgs === null || extraArgs === undefined) return [];
+ return extraArgs.map((entry) => ({ key: entry.key, value: entry.value }));
+ };
+
const createOrUpdateIngestionSource = (
input: UpdateIngestionSourceInput,
resetState: () => void,
@@ -294,6 +299,7 @@ export const IngestionSourceList = () => {
(recipeBuilderState.config?.executorId as string)) ||
DEFAULT_EXECUTOR_ID,
debugMode: recipeBuilderState.config?.debugMode || false,
+ extraArgs: formatExtraArgs(recipeBuilderState.config?.extraArgs || []),
},
schedule: recipeBuilderState.schedule && {
interval: recipeBuilderState.schedule?.interval as string,
diff --git a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
index 992ebff643c31..f4c048bcaf0d2 100644
--- a/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
+++ b/datahub-web-react/src/app/ingest/source/builder/NameSourceStep.tsx
@@ -1,7 +1,7 @@
import { Button, Checkbox, Collapse, Form, Input, Typography } from 'antd';
import React from 'react';
import styled from 'styled-components';
-import { SourceBuilderState, StepProps } from './types';
+import { SourceBuilderState, StepProps, StringMapEntryInput } from './types';
const ControlsContainer = styled.div`
display: flex;
@@ -13,6 +13,10 @@ const SaveButton = styled(Button)`
margin-right: 15px;
`;
+const ExtraEnvKey = 'extra_env_vars';
+const ExtraReqKey = 'extra_pip_requirements';
+const ExtraPluginKey = 'extra_pip_plugins';
+
export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps) => {
const setName = (stagedName: string) => {
const newState: SourceBuilderState = {
@@ -55,6 +59,90 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps)
updateState(newState);
};
+ const retrieveExtraEnvs = () => {
+ const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+ const index: number = extraArgs.findIndex((entry) => entry.key === ExtraEnvKey) as number;
+ if (index > -1) {
+ return extraArgs[index].value;
+ }
+ return '';
+ };
+
+ const setExtraEnvs = (envs: string) => {
+ let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+ const indxOfEnvVars: number = extraArgs.findIndex((entry) => entry.key === ExtraEnvKey) as number;
+ const value = { key: ExtraEnvKey, value: envs };
+ if (indxOfEnvVars > -1) {
+ extraArgs[indxOfEnvVars] = value;
+ } else {
+ extraArgs = [...extraArgs, value];
+ }
+ const newState: SourceBuilderState = {
+ ...state,
+ config: {
+ ...state.config,
+ extraArgs,
+ },
+ };
+ updateState(newState);
+ };
+
+ const retrieveExtraDataHubPlugins = () => {
+ const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+ const index: number = extraArgs.findIndex((entry) => entry.key === ExtraPluginKey) as number;
+ if (index > -1) {
+ return extraArgs[index].value;
+ }
+ return '';
+ };
+
+ const setExtraDataHubPlugins = (plugins: string) => {
+ let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+ const indxOfPlugins: number = extraArgs.findIndex((entry) => entry.key === ExtraPluginKey) as number;
+ const value = { key: ExtraPluginKey, value: plugins };
+ if (indxOfPlugins > -1) {
+ extraArgs[indxOfPlugins] = value;
+ } else {
+ extraArgs = [...extraArgs, value];
+ }
+ const newState: SourceBuilderState = {
+ ...state,
+ config: {
+ ...state.config,
+ extraArgs,
+ },
+ };
+ updateState(newState);
+ };
+
+ const retrieveExtraReqs = () => {
+ const extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+ const index: number = extraArgs.findIndex((entry) => entry.key === ExtraReqKey) as number;
+ if (index > -1) {
+ return extraArgs[index].value;
+ }
+ return '';
+ };
+
+ const setExtraReqs = (reqs: string) => {
+ let extraArgs: StringMapEntryInput[] = state.config?.extraArgs ? state.config?.extraArgs : [];
+ const indxOfReqs: number = extraArgs.findIndex((entry) => entry.key === ExtraReqKey) as number;
+ const value = { key: ExtraReqKey, value: reqs };
+ if (indxOfReqs > -1) {
+ extraArgs[indxOfReqs] = value;
+ } else {
+ extraArgs = [...extraArgs, value];
+ }
+ const newState: SourceBuilderState = {
+ ...state,
+ config: {
+ ...state.config,
+ extraArgs,
+ },
+ };
+ updateState(newState);
+ };
+
const onClickCreate = (shouldRun?: boolean) => {
if (state.name !== undefined && state.name.length > 0) {
submit(shouldRun);
@@ -116,6 +204,39 @@ export const NameSourceStep = ({ state, updateState, prev, submit }: StepProps)
onChange={(event) => setDebugMode(event.target.checked)}
/>
+ Extra Enviroment Variables}>
+
+ Advanced: Set extra environment variables to an ingestion execution
+
+ setExtraEnvs(event.target.value)}
+ />
+
+ Extra DataHub plugins}>
+
+ Advanced: Set extra DataHub plugins for an ingestion execution
+
+ setExtraDataHubPlugins(event.target.value)}
+ />
+
+ Extra Pip Libraries}>
+
+ Advanced: Add extra pip libraries for an ingestion execution
+
+ setExtraReqs(event.target.value)}
+ />
+
diff --git a/datahub-web-react/src/app/ingest/source/builder/types.ts b/datahub-web-react/src/app/ingest/source/builder/types.ts
index cfe0f27ae7dbe..2df467b7beba1 100644
--- a/datahub-web-react/src/app/ingest/source/builder/types.ts
+++ b/datahub-web-react/src/app/ingest/source/builder/types.ts
@@ -34,6 +34,18 @@ export type StepProps = {
ingestionSources: SourceConfig[];
};
+export type StringMapEntryInput = {
+ /**
+ * The key of the map entry
+ */
+ key: string;
+
+ /**
+ * The value fo the map entry
+ */
+ value: string;
+};
+
/**
* The object represents the state of the Ingestion Source Builder form.
*/
@@ -91,5 +103,10 @@ export interface SourceBuilderState {
* Advanced: Whether or not to run this ingestion source in debug mode
*/
debugMode?: boolean | null;
+
+ /**
+ * Advanced: Extra arguments for the ingestion run.
+ */
+ extraArgs?: StringMapEntryInput[] | null;
};
}
diff --git a/datahub-web-react/src/graphql/ingestion.graphql b/datahub-web-react/src/graphql/ingestion.graphql
index c127e9ec03f9a..1767fe34bfef0 100644
--- a/datahub-web-react/src/graphql/ingestion.graphql
+++ b/datahub-web-react/src/graphql/ingestion.graphql
@@ -12,6 +12,10 @@ query listIngestionSources($input: ListIngestionSourcesInput!) {
version
executorId
debugMode
+ extraArgs {
+ key
+ value
+ }
}
schedule {
interval
@@ -51,6 +55,10 @@ query getIngestionSource($urn: String!, $runStart: Int, $runCount: Int) {
version
executorId
debugMode
+ extraArgs {
+ key
+ value
+ }
}
schedule {
interval
diff --git a/docker/build.gradle b/docker/build.gradle
index c8fdbc86b18b7..56634a5fe0c67 100644
--- a/docker/build.gradle
+++ b/docker/build.gradle
@@ -97,10 +97,20 @@ task quickstartDebug(type: Exec, dependsOn: ':metadata-ingestion:install') {
dependsOn(debug_modules.collect { it + ':dockerTagDebug' })
shouldRunAfter ':metadata-ingestion:clean', 'quickstartNuke'
- environment "DATAHUB_PRECREATE_TOPICS", "true"
environment "DATAHUB_TELEMETRY_ENABLED", "false"
environment "DOCKER_COMPOSE_BASE", "file://${rootProject.projectDir}"
+ // Elastic
+ // environment "DATAHUB_SEARCH_IMAGE", 'elasticsearch'
+ // environment "DATAHUB_SEARCH_TAG", '7.10.1'
+
+ // OpenSearch
+ environment "DATAHUB_SEARCH_IMAGE", 'opensearchproject/opensearch'
+ environment "DATAHUB_SEARCH_TAG", '2.9.0'
+ environment "XPACK_SECURITY_ENABLED", 'plugins.security.disabled=true'
+ environment "USE_AWS_ELASTICSEARCH", 'true'
+
+
def cmd = [
'source ../metadata-ingestion/venv/bin/activate && ',
'datahub docker quickstart',
diff --git a/docs/ui-ingestion.md b/docs/ui-ingestion.md
index db2007e1e19a9..438ddd8823b7e 100644
--- a/docs/ui-ingestion.md
+++ b/docs/ui-ingestion.md
@@ -1,5 +1,12 @@
+import FeatureAvailability from '@site/src/components/FeatureAvailability';
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
# Ingestion
+
+
## Introduction
Starting in version `0.8.25`, DataHub supports creating, configuring, scheduling, & executing batch metadata ingestion using the DataHub user interface. This makes
@@ -173,28 +180,29 @@ Finally, give your Ingestion Source a name.
Once you're happy with your configurations, click 'Done' to save your changes.
-##### Advanced: Running with a specific CLI version
+##### Advanced ingestion configs:
-DataHub comes pre-configured to use the latest version of the DataHub CLI ([acryl-datahub](https://pypi.org/project/acryl-datahub/)) that is compatible
+DataHub's Managed Ingestion UI comes pre-configured to use the latest version of the DataHub CLI ([acryl-datahub](https://pypi.org/project/acryl-datahub/)) that is compatible
with the server. However, you can override the default package version using the 'Advanced' source configurations.
To do so, simply click 'Advanced', then change the 'CLI Version' text box to contain the exact version
of the DataHub CLI you'd like to use.
-
_Pinning the CLI version to version `0.8.23.2`_
+Other advanced options include specifying **environment variables**, **DataHub plugins** or **python packages at runtime**.
+
Once you're happy with your changes, simply click 'Done' to save.
You can upload and even update recipes using the cli as mentioned in the [cli documentation for uploading ingestion recipes](./cli.md#ingest-deploy).
-An example execution would look something like:
+An example execution for a given `recipe.yaml` file, would look something like:
```bash
datahub ingest deploy --name "My Test Ingestion Source" --schedule "5 * * * *" --time-zone "UTC" -c recipe.yaml
@@ -330,8 +338,8 @@ for the `datahub-actions` container and running `docker logs `.
There are valid cases for ingesting metadata without the UI-based ingestion scheduler. For example,
- You have written a custom ingestion Source
-- Your data sources are not reachable on the network where DataHub is deployed
-- Your ingestion source requires context from a local filesystem (e.g. input files, environment variables, etc)
+- Your data sources are not reachable on the network where DataHub is deployed. Managed DataHub users can use a [remote executor](managed-datahub/operator-guide/setting-up-remote-ingestion-executor-on-aws.md) for remote UI-based ingestion.
+- Your ingestion source requires context from a local filesystem (e.g. input files)
- You want to distribute metadata ingestion among multiple producers / environments
### How do I attach policies to the actions pod to give it permissions to pull metadata from various sources?
diff --git a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
index d876d99b494f8..77cc2f456aa2d 100644
--- a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
+++ b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md
@@ -13,6 +13,35 @@ This page documents how to perform memory profiles of ingestion runs.
It is useful when trying to size the amount of resources necessary to ingest some source or when developing new features or sources.
## How to use
+
+
+
+
+Create an ingestion as specified in the [Ingestion guide](../../../docs/ui-ingestion.md).
+
+Add a flag to your ingestion recipe to generate a memray memory dump of your ingestion:
+```yaml
+source:
+ ...
+
+sink:
+ ...
+
+flags:
+ generate_memory_profiles: ""
+```
+
+In the final panel, under the advanced section, add the `debug` datahub package under the **Extra DataHub Plugins** section.
+As seen below:
+
+