Skip to content

Commit

Permalink
finish updates
Browse files Browse the repository at this point in the history
  • Loading branch information
atl1502 committed Feb 12, 2024
1 parent e6e20cc commit bcf6b26
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
8 changes: 8 additions & 0 deletions dataprofiler/profilers/numerical_column_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -1924,6 +1924,10 @@ def _get_skewness(
):
return

if self._greater_than_64_bit and type(df_series) is pd.Series:
df_series = df_series.to_numpy(dtype=float)
else:
df_series = pl.from_pandas(df_series, nan_to_null=False)
batch_biased_skewness = profiler_utils.biased_skew(df_series)
subset_properties["biased_skewness"] = batch_biased_skewness
batch_count = subset_properties["match_count"]
Expand Down Expand Up @@ -1968,6 +1972,10 @@ def _get_kurtosis(
):
return

if self._greater_than_64_bit and type(df_series) is pd.Series:
df_series = df_series.to_numpy(dtype=float)
else:
df_series = pl.from_pandas(df_series, nan_to_null=False)
batch_biased_kurtosis = profiler_utils.biased_kurt(df_series)
subset_properties["biased_kurtosis"] = batch_biased_kurtosis
batch_count = subset_properties["match_count"]
Expand Down
11 changes: 6 additions & 5 deletions dataprofiler/profilers/profiler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
import polars as pl
import psutil
import scipy
from pandas import DataFrame, Series
from pandas import DataFrame
from polars import Series

from ..labelers.data_labelers import DataLabeler

Expand Down Expand Up @@ -321,7 +322,7 @@ def add_nested_dictionaries(first_dict: dict, second_dict: dict) -> dict:
return merged_dict


def biased_skew(df_series: Series) -> np.float64:
def biased_skew(df_series: Series | np.ndarray) -> np.float64:
"""
Calculate the biased estimator for skewness of the given data.
Expand All @@ -332,7 +333,6 @@ def biased_skew(df_series: Series) -> np.float64:
:return: biased skewness
:rtype: np.float64
"""
df_series = pl.from_pandas(df_series, nan_to_null=False)
n = len(df_series)
if n < 1:
return np.float64(np.nan)
Expand Down Expand Up @@ -360,7 +360,7 @@ def biased_skew(df_series: Series) -> np.float64:
return skew


def biased_kurt(df_series: Series) -> np.float64:
def biased_kurt(df_series: Series | np.ndarray) -> np.float64:
"""
Calculate the biased estimator for kurtosis of the given data.
Expand All @@ -371,7 +371,6 @@ def biased_kurt(df_series: Series) -> np.float64:
:return: biased kurtosis
:rtype: np.float64
"""
df_series = pl.from_pandas(df_series, nan_to_null=False)
n = len(df_series)
if n < 1:
return np.float64(np.nan)
Expand Down Expand Up @@ -678,6 +677,8 @@ def get_memory_size(data: list | np.ndarray | DataFrame, unit: str = "M") -> flo
:type unit: string
:return: memory size of the input data
"""
if type(data) is DataFrame:
data = pl.from_pandas(data)
unit_map: dict = collections.defaultdict(B=0, K=1, M=2, G=3)
if unit not in unit_map:
raise ValueError(
Expand Down

0 comments on commit bcf6b26

Please sign in to comment.