-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_executions.py
119 lines (105 loc) · 4.08 KB
/
test_executions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import polars as pl
import pytest
from wimsey import execution
from wimsey import tests
def test_run_all_tests_produces_expected_result_object():
tests_to_carry_out = [
tests.max_should(column="a", be_less_than=10),
tests.std_should(column="a", be_greater_than=0),
tests.type_should(column="b", be_one_of=["string", "int64"]),
]
df = pl.DataFrame({"a": [1, 2, 3], "b": ["hat", "bat", "cat"]})
actual = execution.run_all_tests(df, tests_to_carry_out)
assert actual.success is True
for result in actual.results:
assert result.success is True
def test_validate_carries_out_tests_then_returns_object_if_passing():
tests_to_carry_out = [
tests.max_should(column="a", be_less_than=10),
tests.std_should(column="a", be_greater_than=0),
tests.type_should(column="b", be_one_of=["string", "int64"]),
]
df = pl.DataFrame({"a": [1, 2, 3], "b": ["hat", "bat", "cat"]})
actual = execution.validate(df, tests_to_carry_out)
assert isinstance(actual, pl.DataFrame)
def test_validate_raises_error_if_tests_fail():
tests_to_carry_out = [
tests.max_should(column="a", be_less_than=0),
tests.std_should(column="a", be_greater_than=10),
tests.type_should(column="b", be_one_of=["string", "int64"]),
]
df = pl.DataFrame({"a": [1, 2, 3], "b": ["hat", "bat", "cat"]})
with pytest.raises(execution.DataValidationException):
execution.validate(df, tests_to_carry_out)
def test_row_count_expectations_pass_when_expected():
tests_to_carry_out = [
tests.row_count_should(
be_less_than=3.1,
be_less_than_or_equal_to=3,
be_greater_than=-2.343,
be_greater_than_or_equal_to=0.3,
be_exactly=3,
)
]
df = pl.DataFrame({"a": [1, 2, 3], "b": ["hat", "bat", "cat"]})
actual = execution.run_all_tests(df, tests_to_carry_out)
assert actual.success is True
for result in actual.results:
assert result.success is True
def test_columns_should_have_expectations_fail_when_expected():
tests_to_carry_out = [
tests.columns_should(
have="c",
not_have="a",
be=["b", "c"],
)
]
df = pl.DataFrame({"a": [1, 2, 3], "b": ["hat", "bat", "cat"]})
actual = execution.run_all_tests(df, tests_to_carry_out)
assert actual.success is False
def test_column_type_tests_pass_when_expected():
tests_to_carry_out = [
tests.type_should(column="a", be="int64"),
tests.type_should(column="a", be_one_of=["int64", "float64"]),
tests.type_should(column="a", not_be="float64"),
tests.type_should(column="b", be="string")
]
df = pl.DataFrame({"a": [1, 2, 3], "b": ["hat", "bat", "cat"]})
actual = execution.run_all_tests(df, tests_to_carry_out)
assert actual.success is True
for result in actual.results:
assert result.success is True
def test_average_column_difference_tests_pass_when_expected():
tests_to_carry_out = [
tests.average_difference_from_other_column_should(
column="a",
other_column="b",
be_exactly=0,
be_less_than=2,
be_greater_than=-1,
be_less_than_or_equal_to=0,
be_greater_than_or_equal_to=0,
),
]
df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]})
actual = execution.run_all_tests(df, tests_to_carry_out)
assert actual.success is True
for result in actual.results:
assert result.success is True
def test_average_column_ratio_tests_pass_when_expected():
tests_to_carry_out = [
tests.average_ratio_to_other_column_should(
column="b",
other_column="a",
be_exactly=2,
be_less_than=3,
be_greater_than=1,
be_less_than_or_equal_to=2,
be_greater_than_or_equal_to=2,
),
]
df = pl.DataFrame({"a": [1, 2, 3], "b": [2, 4, 6]})
actual = execution.run_all_tests(df, tests_to_carry_out)
assert actual.success is True
for result in actual.results:
assert result.success is True