Add elo difference los and others in the output

Example: Score of opt vs default: 32 - 18 - 8 [0.62068966] 58 Elo difference: +85.6 +/- 87.1, CI: [-1.5, 172.7], LOS: 97.6%, DrawRatio: 13.8%
fsmosca · Sep 4, 2021 · 61d70d2 · 61d70d2
1 parent 71330a6
commit 61d70d2
Showing 1 changed file with 86 additions and 1 deletion.
diff --git a/tourney_manager/duel/duel.py b/tourney_manager/duel/duel.py
@@ -10,7 +10,7 @@
 
 __author__ = 'fsmosca'
 __script_name__ = 'Duel'
-__version__ = 'v1.17.0'
+__version__ = 'v1.18.0'
 __credits__ = ['musketeerchess']
 
 
@@ -27,6 +27,7 @@
 import multiprocessing
 from datetime import datetime
 import glob
+import math
 
 
 logging.basicConfig(
@@ -35,6 +36,82 @@
     format='%(asctime)s - pid%(process)5d - %(levelname)5s - %(message)s')
 
 
+class Elo:
+    """
+    Ref.: https://github.com/cutechess/cutechess/blob/master/projects/lib/src/elo.cpp
+    """
+    def __init__(self, win, loss, draw):
+        self.wins = win
+        self.losses = loss
+        self.draws = draw
+        self.n = win + loss + draw
+        self.mu = self.wins/self.n + self.draws/self.n / 2
+
+    def stdev(self):
+        n = self.n
+        wr = self.wins / n
+        lr = self.losses / n
+        dr = self.draws / n
+
+        dev_w = wr * math.pow(1.0 - self.mu, 2.0)
+        dev_l = lr * math.pow(0.0 - self.mu, 2.0)
+        dev_d = dr * math.pow(0.5 - self.mu, 2.0)
+
+        return math.sqrt(dev_w + dev_l + dev_d) / math.sqrt(n)
+
+    def draw_ratio(self):
+        return self.draws / self.n
+
+    def diff(self, p=None):
+        """Elo difference"""
+        p = self.mu if p is None else p
+
+        # Manage extreme values of p, if 1.0 or more make it 0.99.
+        # If 0 or below make it 0.01. With 0.01 the The max rating diff is 800.
+        p = min(0.99, max(0.01, p))
+        return -400.0 * math.log10(1.0 / p - 1.0)
+
+    def error_margin(self, confidence_level=95):
+        a = (1 - confidence_level/100) / 2
+        mu_min = self.mu + self.phi_inv(a) * self.stdev()
+        mu_max = self.mu + self.phi_inv(1-a) * self.stdev()
+        return (self.diff(mu_max) - self.diff(mu_min)) / 2.0
+
+    def erf_inv(self, x):
+        pi = 3.1415926535897
+
+        a = 8.0 * (pi - 3.0) / (3.0 * pi * (4.0 - pi))
+        y = math.log(1.0 - x * x)
+        z = 2.0 / (pi * a) + y / 2.0
+
+        ret = math.sqrt(math.sqrt(z * z - y / a) - z)
+
+        if x < 0.0:
+            return -ret
+        return ret
+
+    def phi_inv(self, p):
+        return math.sqrt(2.0) * self.erf_inv(2.0 * p - 1.0)
+
+    def los(self):
+        """LOS - Likelihood Of Superiority"""
+        if self.wins == 0 and self.losses == 0:
+            return 0
+        return 100 * (0.5 + 0.5 * math.erf((self.wins - self.losses) / math.sqrt(2.0 * (self.wins + self.losses))))
+
+    def confidence_interval(self, confidence_level=95, type_='elo'):
+        e = self.diff()
+        em = self.error_margin(confidence_level)
+
+        if type_ == 'rate':
+            return self.expected_score_rate(e-em), self.expected_score_rate(e+em)
+        else:
+            return e-em, e+em
+
+    def expected_score_rate(self, rd):
+        return 1 / (1 + 10 ** (-rd/400))
+
+
 class Timer:
     def __init__(self, base_time, inc_time):
         """
@@ -427,7 +504,15 @@ def run(self):
 
                     perf = mean(test_engine_score_list)
                     games = len(test_engine_score_list)
+                    cf = 95
+                    elo = Elo(wins, losses, draws)
+                    elodiff = elo.diff()
+                    em = elo.error_margin(cf)
+                    lowci, highci = elo.confidence_interval(cf, 'elo')
+                    los = elo.los()
+                    drawrate = elo.draw_ratio()
                     print(f'Score of {self.e1["name"]} vs {self.e2["name"]}: {wins} - {losses} - {draws}  [{perf:0.8f}] {games}')
+                    print(f'Elo difference: {elodiff:+0.1f} +/- {em:0.1f}, CI: [{lowci:0.1f}, {highci:0.1f}], LOS: {los:0.1f}%, DrawRatio: {100*drawrate:0.1f}%')
                 except concurrent.futures.process.BrokenProcessPool as ex:
                     print(f'exception: {ex}')