Skip to content

Commit

Permalink
fully finished
Browse files Browse the repository at this point in the history
  • Loading branch information
atl1502 committed Mar 3, 2024
1 parent a06a2a0 commit 7d9e655
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 111 deletions.
17 changes: 8 additions & 9 deletions dataprofiler/profilers/float_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ def _get_float_precision(
:param df_series_clean: df series with nulls removed, assumes all values
are floats as well
:type df_series_clean: pandas.core.series.Series
:type df_series_clean: polars.series.series.Series
:param sample_ratio: Ratio of samples used for float precision
:type sample_ratio: float (between 0 and 1)
:return: string representing its precision print format
Expand Down Expand Up @@ -332,9 +332,9 @@ def _is_each_row_float(cls, df_series: pl.Series) -> pl.Series:
For column [1.0, np.NaN, 1.0] returns [True, True, True]
For column [1.0, "a", "b"] returns [True, False, False]
:param df_series: series of values to evaluate
:type df_series: pandas.core.series.Series
:type df_series: polars.series.series.Series
:return: is_float_col
:rtype: Union[List[bool], pandas.Series[bool]]
:rtype: pl.Series
"""
if len(df_series) == 0:
return pl.Series()
Expand All @@ -361,7 +361,7 @@ def _update_precision(
subset before they are merged into the main data profile.
:type subset_properties: dict
:param df_series: Data to be profiled
:type df_series: pandas.DataFrame
:type df_series: polars.DataFrame
:return: None
"""
sample_ratio = None
Expand Down Expand Up @@ -403,19 +403,18 @@ def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
Update column profile properties with cleaned dataset and its known profile.
:param df_series_clean: df series with nulls removed
:type df_series_clean: pandas.core.series.Series
:type df_series_clean: polars.series.series.Series
:param profile: float profile dictionary
:type profile: dict
:return: None
"""
df_series_clean = df_series_clean.to_pandas()
if self._NumericStatsMixin__calculations:
NumericStatsMixin._update_helper(self, df_series_clean, profile)
self._update_column_base_properties(profile)

def _update_numeric_stats(
self,
df_series: pl.DataFrame,
df_series: pl.Series,
prev_dependent_properties: dict,
subset_properties: dict,
) -> None:
Expand All @@ -430,7 +429,7 @@ def _update_numeric_stats(
subset before they are merged into the main data profile.
:type subset_properties: Dict
:param df_series: Data to be profiled
:type df_series: Pandas Dataframe
:type df_series: Polars Dataframe
:return: None
"""
super()._update_helper(df_series, subset_properties)
Expand All @@ -440,7 +439,7 @@ def update(self, df_series: pl.Series) -> FloatColumn:
Update the column profile.
:param df_series: df series
:type df_series: pandas.core.series.Series
:type df_series: polars.series.series.Series
:return: updated FloatColumn
:rtype: FloatColumn
"""
Expand Down
8 changes: 3 additions & 5 deletions dataprofiler/profilers/int_column_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from __future__ import annotations

import numpy as np
import pandas as pd
import polars as pl

from .base_column_profilers import BaseColumnPrimitiveTypeProfiler, BaseColumnProfiler
Expand Down Expand Up @@ -125,7 +124,7 @@ def _is_each_row_int(cls, df_series: pl.Series) -> list[bool]:
For column [1.1 1.1 1.1] returns False
:param df_series: series of values to evaluate
:type df_series: pandas.core.series.Series
:type df_series: polars.series.series.Series
:return: is_int_col
:rtype: list
"""
Expand All @@ -140,12 +139,11 @@ def _update_helper(self, df_series_clean: pl.Series, profile: dict) -> None:
Update col profile properties with clean dataset and its known null params.
:param df_series_clean: df series with nulls removed
:type df_series_clean: pandas.core.series.Series
:type df_series_clean: polars.series.series.Series
:param profile: int profile dictionary
:type profile: dict
:return: None
"""
df_series_clean = pd.Series(df_series_clean.to_numpy())
if self._NumericStatsMixin__calculations:
NumericStatsMixin._update_helper(self, df_series_clean, profile)
self._update_column_base_properties(profile)
Expand All @@ -155,7 +153,7 @@ def update(self, df_series: pl.Series) -> IntColumn:
Update the column profile.
:param df_series: df series
:type df_series: pandas.core.series.Series
:type df_series: polars.series.series.Series
:return: updated IntColumn
:rtype: IntColumn
"""
Expand Down
Loading

0 comments on commit 7d9e655

Please sign in to comment.