Skip to content

Commit

Permalink
add explanation on test score calculation
Browse files Browse the repository at this point in the history
Signed-off-by: Samuel Herman <[email protected]>
  • Loading branch information
samuel-oci committed Nov 17, 2023
1 parent 21954c9 commit 8d7c3d9
Showing 1 changed file with 13 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ public class ZScoreNormalizationTechniqueTests extends OpenSearchQueryTestCase {
private static final float DELTA_FOR_ASSERTION = 0.0001f;

/**
* Z score will check the relative distance from the center of distribution and hence can also be negative.
* Z score will check the relative distance from the center of distribution in units of standard deviation
* and hence can also be negative. It is using the formula of (score - mean_score)/std
* When only two values are available their z-score numbers will be 1 and -1 correspondingly.
* For more information regarding z-score you can check this link
* https://www.z-table.com/
Expand Down Expand Up @@ -54,6 +55,15 @@ public void testNormalization_whenResultFromOneShardOneSubQuery_thenSuccessful()
);
}

/**
* Z score will check the relative distance from the center of distribution in units of standard deviation
* and hence can also be negative. It is using the formula of (score - mean_score)/std
* When only two values are available their z-score numbers will be 1 and -1 correspondingly as we see in the first query that returns only two document scores.
* When we have more than two documents scores as in the second query the distribution will not be binary and will have different results based on where the center of gravity of the distribution is.
* For more information regarding z-score you can check this link
* https://www.z-table.com/
*
*/
public void testNormalization_whenResultFromOneShardMultipleSubQueries_thenSuccessful() {
ZScoreNormalizationTechnique normalizationTechnique = new ZScoreNormalizationTechnique();
List<CompoundTopDocs> compoundTopDocs = List.of(
Expand All @@ -79,11 +89,13 @@ public void testNormalization_whenResultFromOneShardMultipleSubQueries_thenSucce
List.of(
new TopDocs(
new TotalHits(2, TotalHits.Relation.EQUAL_TO),
// Calculated based on the formula (score - mean_score)/std
new ScoreDoc[] { new ScoreDoc(2, 1.0f), new ScoreDoc(4, -1.0f) }
),
new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]),
new TopDocs(
new TotalHits(3, TotalHits.Relation.EQUAL_TO),
// Calculated based on the formula (score - mean_score)/std for the values of mean_score = (0.9 + 0.7 + 0.1)/3 ~ 0.56, std = sqrt(((0.9 - 0.56)^2 + (0.7 - 0.56)^2 + (0.1 - 0.56)^2)/3)
new ScoreDoc[] { new ScoreDoc(3, 0.98058068f), new ScoreDoc(4, 0.39223227f), new ScoreDoc(2, -1.37281295f) }
)
)
Expand Down

0 comments on commit 8d7c3d9

Please sign in to comment.