-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_quality.json
58 lines (58 loc) · 2.29 KB
/
data_quality.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
[
{
"name": "Great Expectations",
"ring": "trial",
"quadrant": "tools",
"isNew": "FALSE",
"description": "Great Expectations (GX) helps data teams build a shared understanding of their data through quality testing, documentation, and profiling."
},
{
"name": "Deequ",
"ring": "trial",
"quadrant": "tools",
"isNew": "FALSE",
"description": "Deequ is a library built on top of Apache Spark for defining unit tests for data, which measure data quality in large datasets. We are happy to receive feedback and contributions."
},
{
"name": "PyDeequ",
"ring": "trial",
"quadrant": "tools",
"isNew": "FALSE",
"description": "PyDeequ is a Python API for Deequ, a library built on top of Apache Spark for defining unit tests for data, which measure data quality in large datasets. PyDeequ is written to support usage of Deequ in Python."
},
{
"name": "DQC",
"ring": "hold",
"quadrant": "tools",
"isNew": "FALSE",
"description": "test"
},
{
"name": "Soda Core",
"ring": "assess",
"quadrant": "tools",
"isNew": "FALSE",
"description": "An open-source data quality and observability tool."
},
{
"name": "dbt-unit-testing",
"ring": "trial",
"quadrant": "languages & frameworks",
"isNew": "FALSE",
"description": "A dbt package that allows writing unit tests for a model and its logic by mocking its dependencies. This brings the engineering rigor of fast development feedback to the data ecosystem. Our teams use this package with Snowflake to practice test-driven development (TDD), although it was only feasible for simple transformations."
},
{
"name": "dbt-expectations",
"ring": "trial",
"quadrant": "languages & frameworks",
"isNew": "FALSE",
"description": "An extension package for dbt inspired by Great Expectations. Data quality is an important tenet of data governance, so when it comes to automated data governance, it's important to craft built-in controls that flag anomalies or quality issues in data pipelines."
},
{
"name": "pandera",
"ring": "assess",
"quadrant": "languages & frameworks",
"isNew": "FALSE",
"description": "A Python library for testing and validating data across a wide range of frame types such as pandas, Dask or PySpark."
}
]