Skip to content

Commit

Permalink
Merge branch 'master' into powerbi/fix-credentials-timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
elish7lapid authored Sep 13, 2023
2 parents 3aa6560 + 785ab77 commit 97645e6
Show file tree
Hide file tree
Showing 45 changed files with 1,283 additions and 348 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,7 @@ jobs:
DATAHUB_VERSION: ${{ needs.setup.outputs.unique_tag }}
DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_INGESTION_IMAGE }}
ACTIONS_VERSION: ${{ needs.datahub_ingestion_slim_build.outputs.tag }}
ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor] acryl-datahub-actions'
ACTIONS_EXTRA_PACKAGES: 'acryl-datahub-actions[executor]==0.0.13 acryl-datahub-actions==0.0.13 acryl-datahub==0.10.5'
ACTIONS_CONFIG: 'https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml'
run: |
./smoke-test/run-quickstart.sh
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
- uses: actions/setup-python@v4
with:
python-version: "3.10"
cache: pip
- name: Install Python dependencies
run: ./metadata-ingestion/scripts/install_deps.sh
- name: Build Docs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ public class RestoreIndices implements Upgrade {
public static final String URN_ARG_NAME = "urn";
public static final String URN_LIKE_ARG_NAME = "urnLike";

public static final String STARTING_OFFSET_ARG_NAME = "startingOffset";

private final List<UpgradeStep> _steps;

public RestoreIndices(final Database server, final EntityService entityService,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ public class SendMAEStep implements UpgradeStep {

private static final int DEFAULT_BATCH_SIZE = 1000;
private static final long DEFAULT_BATCH_DELAY_MS = 250;

private static final int DEFAULT_STARTING_OFFSET = 0;
private static final int DEFAULT_THREADS = 1;

private final Database _server;
Expand Down Expand Up @@ -83,6 +85,7 @@ private RestoreIndicesArgs getArgs(UpgradeContext context) {
result.batchSize = getBatchSize(context.parsedArgs());
result.numThreads = getThreadCount(context.parsedArgs());
result.batchDelayMs = getBatchDelayMs(context.parsedArgs());
result.start = getStartingOffset(context.parsedArgs());
if (containsKey(context.parsedArgs(), RestoreIndices.ASPECT_NAME_ARG_NAME)) {
result.aspectName = context.parsedArgs().get(RestoreIndices.ASPECT_NAME_ARG_NAME).get();
}
Expand Down Expand Up @@ -124,7 +127,7 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
final int rowCount = getRowCount(args);
context.report().addLine(String.format("Found %s latest aspects in aspects table in %.2f minutes.",
rowCount, (float) (System.currentTimeMillis() - startTime) / 1000 / 60));
int start = 0;
int start = args.start;

List<Future<RestoreIndicesResult>> futures = new ArrayList<>();
startTime = System.currentTimeMillis();
Expand Down Expand Up @@ -186,6 +189,10 @@ private int getBatchSize(final Map<String, Optional<String>> parsedArgs) {
return getInt(parsedArgs, DEFAULT_BATCH_SIZE, RestoreIndices.BATCH_SIZE_ARG_NAME);
}

private int getStartingOffset(final Map<String, Optional<String>> parsedArgs) {
return getInt(parsedArgs, DEFAULT_STARTING_OFFSET, RestoreIndices.STARTING_OFFSET_ARG_NAME);
}

private long getBatchDelayMs(final Map<String, Optional<String>> parsedArgs) {
long resolvedBatchDelayMs = DEFAULT_BATCH_DELAY_MS;
if (containsKey(parsedArgs, RestoreIndices.BATCH_DELAY_MS_ARG_NAME)) {
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion-base/smoke.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-11-jdk
COPY . /datahub-src
ARG RELEASE_VERSION
RUN cd /datahub-src/metadata-ingestion && \
sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
cat src/datahub/__init__.py && \
cd ../ && \
./gradlew :metadata-ingestion:installAll
4 changes: 2 additions & 2 deletions docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ COPY ./metadata-ingestion-modules/airflow-plugin /datahub-ingestion/airflow-plug

ARG RELEASE_VERSION
WORKDIR /datahub-ingestion
RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \
RUN sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" airflow-plugin/src/datahub_airflow_plugin/__init__.py && \
cat src/datahub/__init__.py && \
chown -R datahub /datahub-ingestion

Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion/Dockerfile-slim-only
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ COPY ./metadata-ingestion /datahub-ingestion

ARG RELEASE_VERSION
WORKDIR /datahub-ingestion
RUN sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
RUN sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py && \
cat src/datahub/__init__.py && \
chown -R datahub /datahub-ingestion

Expand Down
4 changes: 4 additions & 0 deletions docs-website/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ task yarnBuild(type: YarnTask, dependsOn: [yarnLint, yarnGenerate, downloadHisto
outputs.dir("dist")
// tell gradle to apply the build cache
outputs.cacheIf { true }
// See https://stackoverflow.com/questions/53230823/fatal-error-ineffective-mark-compacts-near-heap-limit-allocation-failed-java
// and https://github.com/facebook/docusaurus/issues/8329.
// TODO: As suggested in https://github.com/facebook/docusaurus/issues/4765, try switching to swc-loader.
environment = ['NODE_OPTIONS': '--max-old-space-size=10248']
args = ['run', 'build']

}
Expand Down
5 changes: 4 additions & 1 deletion docs-website/sphinx/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ $(VENV_SENTINEL): requirements.txt
$(VENV_DIR)/bin/pip install -r requirements.txt
touch $(VENV_SENTINEL)

.PHONY: help html doctest linkcheck clean serve md
.PHONY: help html doctest linkcheck clean clean_all serve md

# Not using Python's http.server because it enables caching headers.
serve:
Expand All @@ -35,3 +35,6 @@ md: html
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
html doctest linkcheck clean: venv Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

clean_all: clean
-rm -rf $(VENV_DIR)
2 changes: 1 addition & 1 deletion docs-website/sphinx/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-e ../../metadata-ingestion[datahub-rest]
-e ../../metadata-ingestion[datahub-rest,sql-parsing]
beautifulsoup4==4.11.2
Sphinx==6.1.3
sphinx-click==4.4.0
Expand Down
1 change: 1 addition & 0 deletions docs-website/versions.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[
"0.11.0",
"0.10.5"
]
18 changes: 11 additions & 7 deletions docs-website/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -2986,6 +2986,13 @@
dependencies:
"@types/node" "*"

"@types/websocket@^1.0.3":
version "1.0.6"
resolved "https://registry.yarnpkg.com/@types/websocket/-/websocket-1.0.6.tgz#ec8dce5915741632ac3a4b1f951b6d4156e32d03"
integrity sha512-JXkliwz93B2cMWOI1ukElQBPN88vMg3CruvW4KVSKpflt3NyNCJImnhIuB/f97rG7kakqRJGFiwkA895Kn02Dg==
dependencies:
"@types/node" "*"

"@types/ws@^8.5.5":
version "8.5.5"
resolved "https://registry.yarnpkg.com/@types/ws/-/ws-8.5.5.tgz#af587964aa06682702ee6dcbc7be41a80e4b28eb"
Expand Down Expand Up @@ -7053,7 +7060,6 @@ node-forge@^1:
resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-1.3.1.tgz#be8da2af243b2417d5f646a770663a92b7e9ded3"
integrity sha512-dPEtOeMvF9VMcYV/1Wb8CPoVAXtp6MKMlcbAt4ddqmGqUJ6fQZFXkNZNkNlfevtNkGtaSoXf/vNNNSvgrdXwtA==


node-gyp-build@^4.3.0:
version "4.6.1"
resolved "https://registry.yarnpkg.com/node-gyp-build/-/node-gyp-build-4.6.1.tgz#24b6d075e5e391b8d5539d98c7fc5c210cac8a3e"
Expand Down Expand Up @@ -9903,6 +9909,10 @@ use-sidecar@^1.1.2:
detect-node-es "^1.1.0"
tslib "^2.0.0"

use-sync-external-store@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz#7dbefd6ef3fe4e767a0cf5d7287aacfb5846928a"
integrity sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA==

utf-8-validate@^5.0.2:
version "5.0.10"
Expand All @@ -9911,12 +9921,6 @@ utf-8-validate@^5.0.2:
dependencies:
node-gyp-build "^4.3.0"

use-sync-external-store@^1.2.0:
version "1.2.0"
resolved "https://registry.yarnpkg.com/use-sync-external-store/-/use-sync-external-store-1.2.0.tgz#7dbefd6ef3fe4e767a0cf5d7287aacfb5846928a"
integrity sha512-eEgnFxGQ1Ife9bzYs6VLi8/4X6CObHMw9Qr9tPY43iKwsPw8xE8+EFsf/2cFZ5S3esXgpWgtSCtLNS41F+sKPA==


util-deprecate@^1.0.1, util-deprecate@^1.0.2, util-deprecate@~1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
Expand Down
23 changes: 23 additions & 0 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,42 @@ This file documents any backwards-incompatible changes in DataHub and assists pe
## Next

### Breaking Changes
- #8810 - Removed support for SQLAlchemy 1.3.x. Only SQLAlchemy 1.4.x is supported now.

### Potential Downtime

### Deprecations

### Other Notable Changes

## 0.11.0

### Breaking Changes

### Potential Downtime
- #8611 Search improvements requires reindexing indices. A `system-update` job will run which will set indices to read-only and create a backup/clone of each index. During the reindexing new components will be prevented from start-up until the reindex completes. The logs of this job will indicate a % complete per index. Depending on index sizes and infrastructure this process can take 5 minutes to hours however as a rough estimate 1 hour for every 2.3 million entities.

### Deprecations
- #8525: In LDAP ingestor, the `manager_pagination_enabled` changed to general `pagination_enabled`
- MAE Events are no longer produced. MAE events have been deprecated for over a year.

### Other Notable Changes
- In this release we now enable you to create and delete pinned announcements on your DataHub homepage! If you have the “Manage Home Page Posts” platform privilege you’ll see a new section in settings called “Home Page Posts” where you can create and delete text posts and link posts that your users see on the home page.
- The new search and browse experience, which was first made available in the previous release behind a feature flag, is now on by default. Check out our release notes for v0.10.5 to get more information and documentation on this new Browse experience.
- In addition to the ranking changes mentioned above, this release includes changes to the highlighting of search entities to understand why they match your query. You can also sort your results alphabetically or by last updated times, in addition to relevance. In this release, we suggest a correction if your query has a typo in it.
- #8300: Clickhouse source now inherited from TwoTierSQLAlchemy. In old way we have platform_instance -> container -> co
container db (None) -> container schema and now we have platform_instance -> container database.
- #8300: Added `uri_opts` argument; now we can add any options for clickhouse client.
- #8659: BigQuery ingestion no longer creates DataPlatformInstance aspects by default.
This will only affect users that were depending on this aspect for custom functionality,
and can be enabled via the `include_data_platform_instance` config option.
- OpenAPI entity and aspect endpoints expanded to improve developer experience when using this API with additional aspects to be added in the near future.
- The CLI now supports recursive deletes.
- Batching of default aspects on initial ingestion (SQL)
- Improvements to multi-threading. Ingestion recipes, if previously reduced to 1 thread, can be restored to the 15 thread default.
- Gradle 7 upgrade moderately improves build speed
- DataHub Ingestion slim images reduced in size by 2GB+
- Glue Schema Registry fixed

## 0.10.5

Expand Down
6 changes: 3 additions & 3 deletions metadata-ingestion-modules/airflow-plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,14 @@ task testFull(type: Exec, dependsOn: [testQuick, installDevTest]) {
commandLine 'bash', '-x', '-c',
"source ${venv_name}/bin/activate && pytest -m 'not slow_integration' -vv --continue-on-collection-errors --junit-xml=junit.full.xml"
}
task buildWheel(type: Exec, dependsOn: [install]) {
commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
}

task cleanPythonCache(type: Exec) {
commandLine 'bash', '-c',
"find src -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
}
task buildWheel(type: Exec, dependsOn: [install, cleanPythonCache]) {
commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
}

build.dependsOn install
check.dependsOn lint
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ MODULE=datahub_airflow_plugin
python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"'
if [[ ${RELEASE_VERSION:-} ]]; then
# Replace version with RELEASE_VERSION env variable
sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/${MODULE}/__init__.py
sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/${MODULE}/__init__.py
else
vim src/${MODULE}/__init__.py
fi
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Published at https://pypi.org/project/acryl-datahub/.
__package_name__ = "acryl-datahub-airflow-plugin"
__version__ = "0.0.0.dev0"
__version__ = "1!0.0.0.dev0"


def is_dev_mode() -> bool:
Expand Down
9 changes: 3 additions & 6 deletions metadata-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ task installDev(type: Exec, dependsOn: [install]) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"${venv_name}/bin/pip install -e .[dev] ${extra_pip_requirements} && " +
"./scripts/install-sqlalchemy-stubs.sh && " +
"touch ${sentinel_file}"
}

Expand All @@ -82,7 +81,6 @@ task installAll(type: Exec, dependsOn: [install]) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"${venv_name}/bin/pip install -e .[all] ${extra_pip_requirements} && " +
"./scripts/install-sqlalchemy-stubs.sh && " +
"touch ${sentinel_file}"
}

Expand Down Expand Up @@ -119,7 +117,6 @@ task lint(type: Exec, dependsOn: installDev) {
task lintFix(type: Exec, dependsOn: installDev) {
commandLine 'bash', '-c',
"source ${venv_name}/bin/activate && set -x && " +
"./scripts/install-sqlalchemy-stubs.sh && " +
"black src/ tests/ examples/ && " +
"isort src/ tests/ examples/ && " +
"flake8 src/ tests/ examples/ && " +
Expand Down Expand Up @@ -188,16 +185,16 @@ task specGen(type: Exec, dependsOn: [codegen, installDevTest]) {
task docGen(type: Exec, dependsOn: [codegen, installDevTest, specGen]) {
commandLine 'bash', '-c', "source ${venv_name}/bin/activate && ./scripts/docgen.sh"
}
task buildWheel(type: Exec, dependsOn: [install, codegen]) {
commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
}



task cleanPythonCache(type: Exec) {
commandLine 'bash', '-c',
"find src tests -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete -o -type d -empty -delete"
}
task buildWheel(type: Exec, dependsOn: [install, codegen, cleanPythonCache]) {
commandLine 'bash', '-c', "source ${venv_name}/bin/activate && " + 'pip install build && RELEASE_VERSION="\${RELEASE_VERSION:-0.0.0.dev1}" RELEASE_SKIP_TEST=1 RELEASE_SKIP_UPLOAD=1 ./scripts/release.sh'
}

build.dependsOn install
check.dependsOn lint
Expand Down
6 changes: 3 additions & 3 deletions metadata-ingestion/docs/transformer/dataset_transformer.md
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ in both of the cases domain should be provisioned on DataHub GMS
- Add domains, however replace existing domains sent by ingestion source
```yaml
transformers:
- type: "pattern_add_dataset_ownership"
- type: "pattern_add_dataset_domain"
config:
replace_existing: true # false is default behaviour
domain_pattern:
Expand All @@ -920,7 +920,7 @@ in both of the cases domain should be provisioned on DataHub GMS
- Add domains, however overwrite the domains available for the dataset on DataHub GMS
```yaml
transformers:
- type: "pattern_add_dataset_ownership"
- type: "pattern_add_dataset_domain"
config:
semantics: OVERWRITE # OVERWRITE is default behaviour
domain_pattern:
Expand All @@ -931,7 +931,7 @@ in both of the cases domain should be provisioned on DataHub GMS
- Add domains, however keep the domains available for the dataset on DataHub GMS
```yaml
transformers:
- type: "pattern_add_dataset_ownership"
- type: "pattern_add_dataset_domain"
config:
semantics: PATCH
domain_pattern:
Expand Down
28 changes: 0 additions & 28 deletions metadata-ingestion/scripts/install-sqlalchemy-stubs.sh

This file was deleted.

2 changes: 1 addition & 1 deletion metadata-ingestion/scripts/release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fi
python -c 'import setuptools; where="./src"; assert setuptools.find_packages(where) == setuptools.find_namespace_packages(where), "you seem to be missing or have extra __init__.py files"'
if [[ ${RELEASE_VERSION:-} ]]; then
# Replace version with RELEASE_VERSION env variable
sed -i.bak "s/__version__ = \"0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py
sed -i.bak "s/__version__ = \"1!0.0.0.dev0\"/__version__ = \"$RELEASE_VERSION\"/" src/datahub/__init__.py
else
vim src/datahub/__init__.py
fi
Expand Down
Loading

0 comments on commit 97645e6

Please sign in to comment.