-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrainer_anneal.pyx
72 lines (59 loc) · 2.74 KB
/
trainer_anneal.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
A "probabilistic", simplified simulated annealing.
At each step a number of parameters are redrawn and the bot is evaluated with
the new parameters. Then a random threshold is drawn and scaled by a power of
the fraction of the level left. The resulting value is compared with the
returns change, if the growth or decline of the total score is greater than
the scaled threshold the new params are kept, otherwise the search continues
with the previous set.
The config should consist of three values: the number of steps (integer),
an easing factor for the effects of the acceptance distribution (float),
and a variation scale (float).
The thresholds drawn from the acceptance distribution are multiplied by the
fraction of the level left raised to the power of the easing factor, thus a
factor of 0 lets declines be accepted (with some probability) from the start
till the end of the training session, while a high factor only allows declines
to be accepted at the beginning of it.
If the variation scale is less than 1, new param entries are interpolated
between newly drawn values and their old values (rather than being redrawn
anew).
Two probability distributions are used: --dist_variations controls the number
of parameters changed at once at each step, and --dist_acceptance decides
whether to continue with a mutated bot or keep the previous one.
"""
from cython import ccall, cclass, returns
from trainer_local cimport Trainer as TrainerLocal
@cclass
class Trainer(TrainerLocal):
arguments = (
('steps', int),
('acceptance_ease', float),
('change', float)
)
@ccall
@returns('tuple')
def train(self):
variations_dist = self.dists['variations']
acceptance_dist = self.dists['acceptance']
best_score = float('-inf')
best_bot = None
best_history = []
for bot, history in self.seeds:
best_seed_score = bot.evaluate(self.runs)
best_seed_bot = bot
for step in range(self.steps):
bot = best_seed_bot.clone(state=False)
change = self.change
variations = max(1, round(variations_dist.rvs()))
bot.vary_params(self.dists, self.emphases, change, variations)
score = bot.evaluate(self.runs)
improvement = score - best_seed_score
mult = pow(1 - float(step) / self.steps, self.acceptance_ease)
if acceptance_dist.rvs() * mult < improvement:
best_seed_score = score
best_seed_bot = bot
if best_seed_score > best_score:
best_score = best_seed_score
best_bot = best_seed_bot
best_history = history
return best_bot.params, best_history