Skip to content

Commit

Permalink
Add elo difference los and others in the output
Browse files Browse the repository at this point in the history
Example:

Score of opt vs default: 32 - 18 - 8  [0.62068966] 58
Elo difference: +85.6 +/- 87.1, CI: [-1.5, 172.7], LOS: 97.6%, DrawRatio: 13.8%
  • Loading branch information
fsmosca committed Sep 4, 2021
1 parent 71330a6 commit 61d70d2
Showing 1 changed file with 86 additions and 1 deletion.
87 changes: 86 additions & 1 deletion tourney_manager/duel/duel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

__author__ = 'fsmosca'
__script_name__ = 'Duel'
__version__ = 'v1.17.0'
__version__ = 'v1.18.0'
__credits__ = ['musketeerchess']


Expand All @@ -27,6 +27,7 @@
import multiprocessing
from datetime import datetime
import glob
import math


logging.basicConfig(
Expand All @@ -35,6 +36,82 @@
format='%(asctime)s - pid%(process)5d - %(levelname)5s - %(message)s')


class Elo:
"""
Ref.: https://github.com/cutechess/cutechess/blob/master/projects/lib/src/elo.cpp
"""
def __init__(self, win, loss, draw):
self.wins = win
self.losses = loss
self.draws = draw
self.n = win + loss + draw
self.mu = self.wins/self.n + self.draws/self.n / 2

def stdev(self):
n = self.n
wr = self.wins / n
lr = self.losses / n
dr = self.draws / n

dev_w = wr * math.pow(1.0 - self.mu, 2.0)
dev_l = lr * math.pow(0.0 - self.mu, 2.0)
dev_d = dr * math.pow(0.5 - self.mu, 2.0)

return math.sqrt(dev_w + dev_l + dev_d) / math.sqrt(n)

def draw_ratio(self):
return self.draws / self.n

def diff(self, p=None):
"""Elo difference"""
p = self.mu if p is None else p

# Manage extreme values of p, if 1.0 or more make it 0.99.
# If 0 or below make it 0.01. With 0.01 the The max rating diff is 800.
p = min(0.99, max(0.01, p))
return -400.0 * math.log10(1.0 / p - 1.0)

def error_margin(self, confidence_level=95):
a = (1 - confidence_level/100) / 2
mu_min = self.mu + self.phi_inv(a) * self.stdev()
mu_max = self.mu + self.phi_inv(1-a) * self.stdev()
return (self.diff(mu_max) - self.diff(mu_min)) / 2.0

def erf_inv(self, x):
pi = 3.1415926535897

a = 8.0 * (pi - 3.0) / (3.0 * pi * (4.0 - pi))
y = math.log(1.0 - x * x)
z = 2.0 / (pi * a) + y / 2.0

ret = math.sqrt(math.sqrt(z * z - y / a) - z)

if x < 0.0:
return -ret
return ret

def phi_inv(self, p):
return math.sqrt(2.0) * self.erf_inv(2.0 * p - 1.0)

def los(self):
"""LOS - Likelihood Of Superiority"""
if self.wins == 0 and self.losses == 0:
return 0
return 100 * (0.5 + 0.5 * math.erf((self.wins - self.losses) / math.sqrt(2.0 * (self.wins + self.losses))))

def confidence_interval(self, confidence_level=95, type_='elo'):
e = self.diff()
em = self.error_margin(confidence_level)

if type_ == 'rate':
return self.expected_score_rate(e-em), self.expected_score_rate(e+em)
else:
return e-em, e+em

def expected_score_rate(self, rd):
return 1 / (1 + 10 ** (-rd/400))


class Timer:
def __init__(self, base_time, inc_time):
"""
Expand Down Expand Up @@ -427,7 +504,15 @@ def run(self):

perf = mean(test_engine_score_list)
games = len(test_engine_score_list)
cf = 95
elo = Elo(wins, losses, draws)
elodiff = elo.diff()
em = elo.error_margin(cf)
lowci, highci = elo.confidence_interval(cf, 'elo')
los = elo.los()
drawrate = elo.draw_ratio()
print(f'Score of {self.e1["name"]} vs {self.e2["name"]}: {wins} - {losses} - {draws} [{perf:0.8f}] {games}')
print(f'Elo difference: {elodiff:+0.1f} +/- {em:0.1f}, CI: [{lowci:0.1f}, {highci:0.1f}], LOS: {los:0.1f}%, DrawRatio: {100*drawrate:0.1f}%')
except concurrent.futures.process.BrokenProcessPool as ex:
print(f'exception: {ex}')

Expand Down

0 comments on commit 61d70d2

Please sign in to comment.