Skip to content

Commit

Permalink
Better UI for small datasets, or datasets with no variation. Defaulti…
Browse files Browse the repository at this point in the history
…ng to 0 wasn't sound.
  • Loading branch information
scosman committed Feb 28, 2025
1 parent 7f19ffe commit 9e31b8c
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 34 deletions.
30 changes: 15 additions & 15 deletions app/desktop/studio_server/correlation_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ class CorrelationResult:
mean_normalized_absolute_error: float
mean_squared_error: float
mean_normalized_squared_error: float
spearman_correlation: float
pearson_correlation: float
kendalltau_correlation: float
spearman_correlation: float | None
pearson_correlation: float | None
kendalltau_correlation: float | None


class CorrelationCalculator:
Expand Down Expand Up @@ -71,40 +71,40 @@ def calculate_mean_normalized_squared_error(self) -> float:
)
return total_normalized_squared_error / len(self.scores)

def calculate_spearman_correlation(self) -> float:
def calculate_spearman_correlation(self) -> float | None:
if len(self.scores) < 2:
# If there is only one pair, return 0 = no correlation
return 0
# If there is only one pair, no correlation
return None
x = [score.measured_score for score in self.scores]
y = [score.human_score for score in self.scores]
result = stats.spearmanr(x, y)
# library doesn't support proper types
correlation = result.__getattribute__("correlation")
if math.isnan(correlation) or not isinstance(correlation, float):
# Very small samples may have a NaN result (unknown correlation)
return 0
return None
return correlation

def calculate_pearson_correlation(self) -> float:
def calculate_pearson_correlation(self) -> float | None:
if len(self.scores) < 2:
# If there is only one pair, return 0 = no correlation
return 0
# If there is only one pair, no correlation
return None
x = [score.measured_score for score in self.scores]
y = [score.human_score for score in self.scores]
result = stats.pearsonr(x, y)
if math.isnan(result.correlation):
# Very small samples may have a NaN result (unknown correlation)
return 0
return None
return result.correlation

def calculate_kendalltau_correlation(self) -> float:
def calculate_kendalltau_correlation(self) -> float | None:
if len(self.scores) < 2:
# If there is only one pair, return 0 = no correlation
return 0
# If there is only one pair, no correlation
return None
x = [score.measured_score for score in self.scores]
y = [score.human_score for score in self.scores]
result = stats.kendalltau(x, y)
if math.isnan(result.correlation):
# Very small samples may have a NaN result (unknown correlation)
return 0
return None
return result.correlation
6 changes: 3 additions & 3 deletions app/desktop/studio_server/test_correlation_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,9 @@ def test_single_data_point(self, single_data_point):
assert result.mean_normalized_absolute_error == 0.0
assert result.mean_squared_error == 0.0
assert result.mean_normalized_squared_error == 0.0
assert result.spearman_correlation == 0.0
assert result.pearson_correlation == 0.0
assert result.kendalltau_correlation == 0.0
assert result.spearman_correlation is None
assert result.pearson_correlation is None
assert result.kendalltau_correlation is None

def test_two_data_points(self, two_data_points):
"""Test correlation calculations with two data points"""
Expand Down
24 changes: 12 additions & 12 deletions app/desktop/studio_server/test_eval_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,18 +925,18 @@ class EvalCondigSummaryTestData:
"mean_absolute_error": 4.0, # error 4.0
"mean_normalized_squared_error": 1, # max error: 1 v 5
"mean_normalized_absolute_error": 1, # max error: 1 v 5
"spearman_correlation": 0, # default value for 1 pair
"pearson_correlation": 0,
"kendalltau_correlation": 0,
"spearman_correlation": None, # Not enough data
"pearson_correlation": None,
"kendalltau_correlation": None,
},
"score1": {
"mean_squared_error": 2.25, # error (3.5-5.0)^2
"mean_absolute_error": 1.5, # error 1.5
"mean_normalized_squared_error": 0.140625, # hand calc
"mean_normalized_absolute_error": 0.375, # 1.5/4
"spearman_correlation": 0, # default value for 1 pair
"pearson_correlation": 0,
"kendalltau_correlation": 0,
"spearman_correlation": None, # Not enough data
"pearson_correlation": None, # Not enough data
"kendalltau_correlation": None, # Not enough data
},
}
# 1 of total_in_dataset eval configs are are in ec1 test
Expand All @@ -949,9 +949,9 @@ class EvalCondigSummaryTestData:
"mean_absolute_error": 1.5, # (1+2)/2
"mean_normalized_squared_error": 0.15625, # (0.25^2 + 0.5^2) / 2
"mean_normalized_absolute_error": 0.375, # (0.25 + 0.5) / 2
"spearman_correlation": 0,
"pearson_correlation": 0,
"kendalltau_correlation": 0,
"spearman_correlation": None,
"pearson_correlation": None,
"kendalltau_correlation": None,
},
"score1": {
"mean_squared_error": 2.5, # (1^2+2^2)/2
Expand All @@ -973,9 +973,9 @@ class EvalCondigSummaryTestData:
"mean_absolute_error": 2,
"mean_normalized_squared_error": 0.25,
"mean_normalized_absolute_error": 0.5,
"spearman_correlation": 0,
"pearson_correlation": 0,
"kendalltau_correlation": 0,
"spearman_correlation": None,
"pearson_correlation": None,
"kendalltau_correlation": None,
},
}
# 2 of total_in_dataset eval configs are are in ec2 test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,17 @@
{:else if score_type === "norm_mae"}
{scores.mean_normalized_absolute_error.toFixed(3)}
{:else if score_type === "spearman"}
{scores.spearman_correlation.toFixed(3)}
{scores.spearman_correlation
? scores.spearman_correlation.toFixed(3)
: "N/A"}
{:else if score_type === "pearson"}
{scores.pearson_correlation.toFixed(3)}
{scores.pearson_correlation
? scores.pearson_correlation.toFixed(3)
: "N/A"}
{:else if score_type === "kendalltau"}
{scores.kendalltau_correlation.toFixed(3)}
{scores.kendalltau_correlation
? scores.kendalltau_correlation.toFixed(3)
: "N/A"}
{/if}
{:else}
unknown
Expand Down Expand Up @@ -593,7 +599,8 @@
These are three scientific correlation coefficients. For all three, The
value tends to be high (close to 1) for samples with a strongly positive
correlation, low (close to -1) for samples with a strongly negative
correlation, and close to zero for samples with weak correlation.
correlation, and close to zero for samples with weak correlation. Scores may
be 'N/A' if there are too few samples or not enough variation in scores.
</div>
<ul class="list-disc text-sm text-gray-500 pl-5 pt-2">
<li>
Expand Down

0 comments on commit 9e31b8c

Please sign in to comment.