Skip to content
This repository has been archived by the owner on May 17, 2024. It is now read-only.

Commit

Permalink
Merge pull request #638 from datafold/valentin-dx-842-add-column-type…
Browse files Browse the repository at this point in the history
…s-to-json-output

Add column types to json output
  • Loading branch information
dlawin authored Jul 16, 2023
2 parents 0d65830 + 80647b5 commit d791017
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 19 deletions.
12 changes: 11 additions & 1 deletion data_diff/dbt.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,13 +308,23 @@ def _local_diff(diff_vars: TDiffVars, json_output: bool = False) -> None:
)
return

dataset1_columns = [
(name, type_, table1.database.dialect.parse_type(table1.table_path, name, type_, *other))
for (name, type_, *other) in table1_columns.values()
]
dataset2_columns = [
(name, type_, table2.database.dialect.parse_type(table2.table_path, name, type_, *other))
for (name, type_, *other) in table2_columns.values()
]
print(
json.dumps(
jsonify(
diff,
dbt_model=diff_vars.dbt_model,
dataset1_columns=dataset1_columns,
dataset2_columns=dataset2_columns,
with_summary=True,
with_columns={
columns_diff={
"added": columns_added,
"removed": columns_removed,
"changed": columns_type_changed,
Expand Down
86 changes: 77 additions & 9 deletions data_diff/format.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,23 @@
import collections
from typing import Any, Optional, List, Dict, Tuple
from enum import Enum
from typing import Any, Optional, List, Dict, Tuple, Type

from runtype import dataclass
from data_diff.diff_tables import DiffResultWrapper
from data_diff.sqeleton.abcs.database_types import (
JSON,
Boolean,
ColType,
Array,
ColType_UUID,
Date,
FractionalType,
NumericType,
Struct,
TemporalType,
ColType_Alphanum,
String_Alphanum,
)


def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: str) -> "FailedDiff":
Expand All @@ -15,11 +30,16 @@ def jsonify_error(table1: List[str], table2: List[str], dbt_model: str, error: s
).json()


Columns = List[Tuple[str, str, ColType]]


def jsonify(
diff: DiffResultWrapper,
dbt_model: str,
dataset1_columns: Columns,
dataset2_columns: Columns,
columns_diff: Dict[str, List[str]],
with_summary: bool = False,
with_columns: Optional[Dict[str, List[str]]] = None,
) -> "JsonDiff":
"""
Converts the diff result into a JSON-serializable format.
Expand Down Expand Up @@ -53,16 +73,13 @@ def jsonify(
if with_summary:
summary = _jsonify_diff_summary(diff.get_stats_dict(is_dbt=True))

columns = None
if with_columns:
columns = _jsonify_columns_diff(with_columns, list(key_columns))
columns = _jsonify_columns_diff(dataset1_columns, dataset2_columns, columns_diff, list(key_columns))

is_different = bool(
t1_exclusive_rows
or t2_exclusive_rows
or diff_rows
or with_columns
and (with_columns["added"] or with_columns["removed"] or with_columns["changed"])
or (columns_diff["added"] or columns_diff["removed"] or columns_diff["changed"])
)
return JsonDiff(
status="success",
Expand Down Expand Up @@ -138,8 +155,44 @@ class ExclusiveColumns:
dataset2: List[str]


class ColumnKind(Enum):
INTEGER = "integer"
FLOAT = "float"
STRING = "string"
DATE = "date"
TIME = "time"
DATETIME = "datetime"
BOOL = "boolean"
UNSUPPORTED = "unsupported"


KIND_MAPPING: List[Tuple[Type[ColType], ColumnKind]] = [
(Boolean, ColumnKind.BOOL),
(Date, ColumnKind.DATE),
(TemporalType, ColumnKind.DATETIME),
(FractionalType, ColumnKind.FLOAT),
(NumericType, ColumnKind.INTEGER),
(ColType_UUID, ColumnKind.STRING),
(ColType_Alphanum, ColumnKind.STRING),
(String_Alphanum, ColumnKind.STRING),
(JSON, ColumnKind.STRING),
(Array, ColumnKind.STRING),
(Struct, ColumnKind.STRING),
(ColType, ColumnKind.UNSUPPORTED),
]


@dataclass
class Column:
name: str
type: str
kind: str


@dataclass
class JsonColumnsSummary:
dataset1: List[Column]
dataset2: List[Column]
primaryKey: List[str]
exclusive: ExclusiveColumns
typeChanged: List[str]
Expand Down Expand Up @@ -179,7 +232,7 @@ class JsonDiff:
summary: Optional[JsonDiffSummary]
columns: Optional[JsonColumnsSummary]

version: str = "1.0.0"
version: str = "1.1.0"


def _group_rows(
Expand Down Expand Up @@ -262,12 +315,27 @@ def _jsonify_diff_summary(stats_dict: dict) -> JsonDiffSummary:
)


def _jsonify_columns_diff(columns_diff: Dict[str, List[str]], key_columns: List[str]) -> JsonColumnsSummary:
def _jsonify_columns_diff(
dataset1_columns: Columns, dataset2_columns: Columns, columns_diff: Dict[str, List[str]], key_columns: List[str]
) -> JsonColumnsSummary:
return JsonColumnsSummary(
dataset1=[
Column(name=name, type=type_, kind=_map_kind(kind).value) for (name, type_, kind) in dataset1_columns
],
dataset2=[
Column(name=name, type=type_, kind=_map_kind(kind).value) for (name, type_, kind) in dataset2_columns
],
primaryKey=key_columns,
exclusive=ExclusiveColumns(
dataset2=list(columns_diff.get("added", [])),
dataset1=list(columns_diff.get("removed", [])),
),
typeChanged=list(columns_diff.get("changed", [])),
)


def _map_kind(kind: ColType) -> ColumnKind:
for raw_kind, json_kind in KIND_MAPPING:
if isinstance(kind, raw_kind):
return json_kind
return ColumnKind.UNSUPPORTED
113 changes: 104 additions & 9 deletions tests/test_format.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
from data_diff.diff_tables import DiffResultWrapper, InfoTree, SegmentInfo, TableSegment
from data_diff.format import jsonify
from data_diff.sqeleton.abcs.database_types import Integer
from data_diff.sqeleton.databases import Database


Expand Down Expand Up @@ -35,11 +36,28 @@ def test_jsonify_diff(self):
diff=[],
stats={},
)
json_diff = jsonify(diff, dbt_model="my_model")
json_diff = jsonify(
diff,
dbt_model="my_model",
dataset1_columns=[
("id", "NUMBER", Integer()),
("value", "NUMBER", Integer()),
],
dataset2_columns=[
("id", "NUMBER", Integer()),
("value", "NUMBER", Integer()),
],
columns_diff={
"added": [],
"removed": [],
"typeChanged": [],
},
)

self.assertEqual(
json_diff,
{
"version": "1.0.0",
"version": "1.1.0",
"status": "success",
"result": "different",
"model": "my_model",
Expand All @@ -57,8 +75,23 @@ def test_jsonify_diff(self):
},
],
},
"columns": {
"dataset1": [
{"name": "id", "type": "NUMBER", "kind": "integer"},
{"name": "value", "type": "NUMBER", "kind": "integer"},
],
"dataset2": [
{"name": "id", "type": "NUMBER", "kind": "integer"},
{"name": "value", "type": "NUMBER", "kind": "integer"},
],
"primaryKey": ["id"],
"exclusive": {
"dataset1": [],
"dataset2": [],
},
"typeChanged": [],
},
"summary": None,
"columns": None,
},
)

Expand Down Expand Up @@ -86,11 +119,27 @@ def test_jsonify_diff_no_difeference(self):
diff=[],
stats={},
)
json_diff = jsonify(diff, dbt_model="model")
json_diff = jsonify(
diff,
dbt_model="model",
dataset1_columns=[
("id", "NUMBER", Integer()),
("value", "NUMBER", Integer()),
],
dataset2_columns=[
("id", "NUMBER", Integer()),
("value", "NUMBER", Integer()),
],
columns_diff={
"added": [],
"removed": [],
"changed": [],
},
)
self.assertEqual(
json_diff,
{
"version": "1.0.0",
"version": "1.1.0",
"status": "success",
"result": "identical",
"model": "model",
Expand All @@ -100,8 +149,23 @@ def test_jsonify_diff_no_difeference(self):
"exclusive": {"dataset1": [], "dataset2": []},
"diff": [],
},
"columns": {
"primaryKey": ["id"],
"dataset1": [
{"name": "id", "type": "NUMBER", "kind": "integer"},
{"name": "value", "type": "NUMBER", "kind": "integer"},
],
"dataset2": [
{"name": "id", "type": "NUMBER", "kind": "integer"},
{"name": "value", "type": "NUMBER", "kind": "integer"},
],
"exclusive": {
"dataset1": [],
"dataset2": [],
},
"typeChanged": [],
},
"summary": None,
"columns": None,
},
)

Expand Down Expand Up @@ -133,11 +197,27 @@ def test_jsonify_column_suffix_fix(self):
diff=[],
stats={},
)
json_diff = jsonify(diff, dbt_model="my_model")
json_diff = jsonify(
diff,
dbt_model="my_model",
dataset1_columns=[
("id_a", "NUMBER", Integer()),
("value_b", "NUMBER", Integer()),
],
dataset2_columns=[
("id_a", "NUMBER", Integer()),
("value_b", "NUMBER", Integer()),
],
columns_diff={
"added": [],
"removed": [],
"typeChanged": [],
},
)
self.assertEqual(
json_diff,
{
"version": "1.0.0",
"version": "1.1.0",
"status": "success",
"result": "different",
"model": "my_model",
Expand All @@ -158,6 +238,21 @@ def test_jsonify_column_suffix_fix(self):
],
},
"summary": None,
"columns": None,
"columns": {
"dataset1": [
{"name": "id_a", "type": "NUMBER", "kind": "integer"},
{"name": "value_b", "type": "NUMBER", "kind": "integer"},
],
"dataset2": [
{"name": "id_a", "type": "NUMBER", "kind": "integer"},
{"name": "value_b", "type": "NUMBER", "kind": "integer"},
],
"primaryKey": ["id_a"],
"exclusive": {
"dataset1": [],
"dataset2": [],
},
"typeChanged": [],
},
},
)

0 comments on commit d791017

Please sign in to comment.