Skip to content

Commit

Permalink
Updated demos (#169)
Browse files Browse the repository at this point in the history
## Changes
Make the results of quality checks in demos more interesting (show
multiple errors for a row).

### Tests

- [x] manually tested
- [ ] added unit tests
- [ ] added integration tests
  • Loading branch information
mwojtyczka authored Feb 13, 2025
1 parent dc94af3 commit de11239
Showing 1 changed file with 20 additions and 21 deletions.
41 changes: 20 additions & 21 deletions demos/dqx_demo_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,24 +136,24 @@
- col1
- col2
- criticality: error
- criticality: warn
check:
function: is_not_null_and_not_empty
arguments:
col_name: col3
- criticality: error
- criticality: warn
filter: col1 < 3
check:
function: is_not_null_and_not_empty
arguments:
col_name: col4
- criticality: warn
- criticality: error
check:
function: value_is_in_list
arguments:
col_name: col4
col_name: col1
allowed:
- 1
- 2
Expand All @@ -164,7 +164,7 @@
assert not status.has_errors

schema = "col1: int, col2: int, col3: int, col4 int"
input_df = spark.createDataFrame([[1, 3, 3, 1], [2, None, 4, 1]], schema)
input_df = spark.createDataFrame([[1, 3, 3, None], [3, None, 4, 1]], schema)

dq_engine = DQEngine(WorkspaceClient())

Expand Down Expand Up @@ -194,20 +194,20 @@
check_func=is_not_null).get_rules() + [
DQRule( # define rule for a single column
name="col3_is_null_or_empty",
criticality="error",
criticality="warn",
check=is_not_null_and_not_empty("col3")),
DQRule( # define rule with a filter
name="col_4_is_null_or_empty",
criticality="error",
criticality="warn",
filter="col1 < 3",
check=is_not_null_and_not_empty("col4")),
DQRule( # name auto-generated if not provided
criticality="warn",
check=value_is_in_list("col4", ["1", "2"]))
criticality="error",
check=value_is_in_list("col1", ["1", "2"]))
]

schema = "col1: int, col2: int, col3: int, col4 int"
input_df = spark.createDataFrame([[1, 3, 3, 1], [2, None, 4, 1]], schema)
input_df = spark.createDataFrame([[1, 3, 3, None], [3, None, 4, 1]], schema)

dq_engine = DQEngine(WorkspaceClient())

Expand Down Expand Up @@ -336,7 +336,7 @@ def ends_with_foo(col_name: str) -> Column:
function: is_not_null_and_not_empty
arguments:
col_name: col1
- criticality: error
- criticality: warn
check:
function: ends_with_foo
arguments:
Expand All @@ -350,8 +350,8 @@ def ends_with_foo(col_name: str) -> Column:
"""
)

schema = "col1: string"
input_df = spark.createDataFrame([["str1"], ["foo"], ["str3"]], schema)
schema = "col1: string, col2: string"
input_df = spark.createDataFrame([[None, "foo"], ["foo", None], [None, None]], schema)

dq_engine = DQEngine(WorkspaceClient())

Expand Down Expand Up @@ -380,14 +380,13 @@ def ends_with_foo(col_name: str) -> Column:
ws = WorkspaceClient()
dq_engine = DQEngine(ws, extra_params=extra_parameters)

schema = "col1: string"
input_df = spark.createDataFrame([["str1"], ["foo"], ["str3"]], schema)
schema = "col1: string, col2: string"
input_df = spark.createDataFrame([[None, "foo"], ["foo", None], [None, None]], schema)

checks = [ DQRule(
name="col_1_is_null_or_empty",
criticality="error",
check=is_not_null_and_not_empty("col1")),
]
checks = [
DQRule(criticality="error", check=is_not_null_and_not_empty("col1")),
DQRule(criticality="warn", check=is_not_null_and_not_empty("col2")),
]

valid_and_quarantined_df = dq_engine.apply_checks(input_df, checks)
display(valid_and_quarantined_df)
display(valid_and_quarantined_df)

0 comments on commit de11239

Please sign in to comment.