Skip to content

Commit

Permalink
feat: automatically clean up staging datasets when using insert-from-…
Browse files Browse the repository at this point in the history
…staging
  • Loading branch information
z3z1ma committed Apr 16, 2024
1 parent 53f4a9f commit 3c72a8a
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
2 changes: 2 additions & 0 deletions examples/sandbox/alex/cdf.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
default:
name: alex
destination:
replace_strategy: insert-from-staging
pipelines:
# The pipeline name is based on the dict key by default, metadata follows in the body
us_cities:
Expand Down
14 changes: 13 additions & 1 deletion src/cdf/core/runtime/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import fnmatch
import os
import typing as t
from contextlib import contextmanager, nullcontext, redirect_stdout
from contextlib import contextmanager, nullcontext, redirect_stdout, suppress
from contextvars import ContextVar, copy_context

import dlt
Expand Down Expand Up @@ -340,6 +340,7 @@ def execute_pipeline_specification(
metrics=spec.runtime_metrics,
):
context_snapshot = copy_context()
pipe_ref = context_snapshot.run(pipeline_factory)
null = open(os.devnull, "w")
maybe_redirect = redirect_stdout(null) if quiet else nullcontext()
if intercept_sources:
Expand All @@ -355,6 +356,17 @@ def execute_pipeline_specification(
try:
with maybe_redirect:
exports = context_snapshot.run(spec.main)
with (
suppress(KeyError),
pipe_ref.sql_client() as client,
client.with_staging_dataset(staging=True) as staging_client,
):
strategy = dlt.config["destination.replace_strategy"]
if strategy in ("insert-from-staging",) and staging_client.has_dataset():
logger.info(
f"Cleaning up staging dataset {staging_client.dataset_name}"
)
staging_client.drop_dataset()
return M.ok(exports)
except Exception as e:
return M.error(e)
Expand Down

0 comments on commit 3c72a8a

Please sign in to comment.