Skip to content

Commit

Permalink
Staging into main from dev (#1106)
Browse files Browse the repository at this point in the history
* add downloads tile (#1085)

* Hot fix json bug (#1105)

* update

* update
  • Loading branch information
taylorfturner authored Mar 5, 2024
1 parent a92ab1e commit 1287027
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/DataProfiler)
![GitHub](https://img.shields.io/github/license/CapitalOne/DataProfiler)
![GitHub last commit](https://img.shields.io/github/last-commit/CapitalOne/DataProfiler)
[![Downloads](https://static.pepy.tech/badge/dataprofiler)](https://pepy.tech/project/dataprofiler)

<p text-align="left">
<picture>
Expand Down
3 changes: 2 additions & 1 deletion dataprofiler/profilers/json_encoder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Contains ProfilerEncoder class."""

import json
from datetime import datetime

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -52,7 +53,7 @@ def default(self, to_serialize):
return int(to_serialize)
elif isinstance(to_serialize, np.ndarray):
return to_serialize.tolist()
elif isinstance(to_serialize, pd.Timestamp):
elif isinstance(to_serialize, (pd.Timestamp, datetime)):
return to_serialize.isoformat()
elif isinstance(to_serialize, BaseDataLabeler):
# TODO: This does not allow the user to serialize a model if it is loaded
Expand Down
3 changes: 2 additions & 1 deletion dataprofiler/tests/labelers/test_labeler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,9 @@ def test_verbose(self):
self.assertIn("f1-score ", log_output)
self.assertIn("F1 Score: ", log_output)

@mock.patch("dataprofiler.labelers.labeler_utils.classification_report")
@mock.patch("pandas.DataFrame")
def test_save_conf_mat(self, mock_dataframe):
def test_save_conf_mat(self, mock_dataframe, mock_report):

# ideally mock out the actual contents written to file, but
# would be difficult to get this completely worked out.
Expand Down
41 changes: 41 additions & 0 deletions dataprofiler/tests/profilers/test_datetime_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,47 @@ def test_json_encode_after_update(self):

self.assertEqual(serialized, expected)

def test_json_encode_datetime(self):
data = ["1209214"]
df = pd.Series(data)
profiler = DateTimeColumn("0")

expected_date_formats = [
"%Y-%m-%d %H:%M:%S",
"%b %d, %Y",
"%m/%d/%y %H:%M",
]
with patch.object(
profiler, "_combine_unique_sets", return_value=expected_date_formats
):
with patch("time.time", return_value=0.0):
profiler.update(df)

serialized = json.dumps(profiler, cls=ProfileEncoder)

expected = json.dumps(
{
"class": "DateTimeColumn",
"data": {
"name": "0",
"col_index": np.nan,
"sample_size": 1,
"metadata": {},
"times": defaultdict(float, {"datetime": 0.0}),
"thread_safe": True,
"match_count": 1,
"date_formats": expected_date_formats,
"min": "1209214",
"max": "1209214",
"_dt_obj_min": "9214-01-20T00:00:00",
"_dt_obj_max": "9214-01-20T00:00:00",
"_DateTimeColumn__calculations": dict(),
},
}
)

self.assertEqual(serialized, expected)

def test_json_decode(self):
fake_profile_name = None
expected_profile = DateTimeColumn(fake_profile_name)
Expand Down

0 comments on commit 1287027

Please sign in to comment.