From 77458ce112f62cf7a068d931b94aa3d1b70a9107 Mon Sep 17 00:00:00 2001 From: Anh-Khoa Ngo-Ho Date: Wed, 10 Jan 2024 13:31:31 +0000 Subject: [PATCH] fix: problem of dcor, cannot process >20K rows of dataframe --- qolmat/benchmark/metrics.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/qolmat/benchmark/metrics.py b/qolmat/benchmark/metrics.py index a646d713..c0b0048d 100644 --- a/qolmat/benchmark/metrics.py +++ b/qolmat/benchmark/metrics.py @@ -1117,6 +1117,11 @@ def distance_anticorr(df1: pd.DataFrame, df2: pd.DataFrame, df_mask: pd.DataFram df1 = df1[df_mask.any(axis=1)] df2 = df2[df_mask.any(axis=1)] + + if len(df1) > 30000: + df1 = df1.sample(20000) + df2 = df2.sample(20000) + return (1 - dcor.distance_correlation(df1.values, df2.values)) / 2