Skip to content

Commit

Permalink
fix: fix correlations issues whenever strings are present in the data…
Browse files Browse the repository at this point in the history
…set.
  • Loading branch information
Fabiana Clemente authored and fabclmnt committed Jul 13, 2024
1 parent 9027235 commit 1c94d33
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions src/ydata_profiling/model/pandas/correlations_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,21 +26,24 @@
def pandas_spearman_compute(
config: Settings, df: pd.DataFrame, summary: dict
) -> Optional[pd.DataFrame]:
return df.corr(method="spearman")
df_aux = df.select_dtypes(include="number").copy()
return df_aux.corr(method="spearman")


@Pearson.compute.register(Settings, pd.DataFrame, dict)
def pandas_pearson_compute(
config: Settings, df: pd.DataFrame, summary: dict
) -> Optional[pd.DataFrame]:
return df.corr(method="pearson")
df_aux = df.select_dtypes(include="number").copy()
return df_aux.corr(method="pearson")


@Kendall.compute.register(Settings, pd.DataFrame, dict)
def pandas_kendall_compute(
config: Settings, df: pd.DataFrame, summary: dict
) -> Optional[pd.DataFrame]:
return df.corr(method="kendall")
df_aux = df.select_dtypes(include="number").copy()
return df_aux.corr(method="kendall")


def _cramers_corrected_stat(confusion_matrix: pd.DataFrame, correction: bool) -> float:
Expand Down Expand Up @@ -195,7 +198,7 @@ def pandas_auto_compute(

method = (
_pairwise_spearman
if col_1_name and col_2_name not in categorical_columns
if any(elem in categorical_columns for elem in [col_1_name, col_2_name]) is False
else _pairwise_cramers
)

Expand Down

0 comments on commit 1c94d33

Please sign in to comment.