Skip to content

Commit

Permalink
Replace skills with a new scheme to limit strength
Browse files Browse the repository at this point in the history
based on Sopel's initial implementation discussed in official-stockfish#3635

in this new scheme, the strenght is of the engine is limited by replacing a (varying) part of the evaluation,
with a random perturbation. This scheme is easier to implement than our current skill level implementation,
and has the advantage that it has a wider Elo range, being both weaker than skill level 1 and stronger than skill level 19.

The skill level option is removed, and instead UCI_Elo and UCI_LimitStrength are the only options available.

UCI_Elo is calibrated such that 1500 Elo is equivalent in strength to the engine maia1 (https://lichess.org/@/maia1)
which has a blitz rating on lichess of 1500 (based on nearly 600k human games). The full Elo range (750 - 5200) is obtained by playing
games between engines roughly 100-200 elo apart with the perturbation going from 0 to 1000, and fitting the ordo results. With this fit,
a conversion from UCI_Elo to the magnitude of the random perturbation is possible.
All games are played at lichess blitz TC (5m+3s), and playing strenght is different at different TC.
Indeed, maia1 is a fixed 1 node leela 'search', independent from TC, whereas this scheme searches normally, and improves with TC.

There are a few caveats, it is unclear how the playing style of the engine is, the old skill level was not really satisfactory, it needs to be seen if this is fixed with this approach. Furthermore, while in the engine - engine matches maia1 and SF@1500Elo are equivalent in strength (at blitz TC), it is not sure if its rating against humans will be the same (engine Elo and human Elo can be very different).

No functional change
  • Loading branch information
vondele committed Sep 11, 2021
1 parent b7b6b4b commit caa7faa
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 74 deletions.
21 changes: 21 additions & 0 deletions src/evaluate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <fstream>
#include <iomanip>
#include <sstream>
#include <random>
#include <iostream>
#include <streambuf>
#include <vector>
Expand Down Expand Up @@ -61,6 +62,8 @@ namespace Stockfish {
namespace Eval {

bool useNNUE;
bool limitStrength;
int randomEvalPerturb;
string eval_file_loaded = "None";

/// NNUE::init() tries to load a NNUE network at startup time, or when the engine
Expand Down Expand Up @@ -1075,6 +1078,20 @@ namespace {
: -Value(correction);
}

// Randomly perturb the evaluation in a calibrated way to yield a weaker engine
Value randomly_perturbed_eval(Value v)
{
static thread_local std::mt19937_64 tls_rng = [](){
return std::mt19937_64(std::time(nullptr));
}();

std::normal_distribution<float> d(0.0, QueenValueEg);
float r = d(tls_rng);

// linearly combine the random term with the real evaluation
return (Eval::randomEvalPerturb * Value(r) + (1000 - Eval::randomEvalPerturb) * v) / 1000;
}

} // namespace Eval


Expand Down Expand Up @@ -1117,6 +1134,10 @@ Value Eval::evaluate(const Position& pos) {
// Damp down the evaluation linearly when shuffling
v = v * (100 - pos.rule50_count()) / 100;

// Optionally, limit the playing strength by perturbing the evaluation
if (Eval::limitStrength)
v = randomly_perturbed_eval(v);

// Guarantee evaluation does not hit the tablebase range
v = std::clamp(v, VALUE_TB_LOSS_IN_MAX_PLY + 1, VALUE_TB_WIN_IN_MAX_PLY - 1);

Expand Down
2 changes: 2 additions & 0 deletions src/evaluate.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ namespace Eval {
Value evaluate(const Position& pos);

extern bool useNNUE;
extern bool limitStrength;
extern int randomEvalPerturb;
extern std::string eval_file_loaded;

// The default net name MUST follow the format nn-[SHA256 first 12 digits].nnue
Expand Down
73 changes: 1 addition & 72 deletions src/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,6 @@ namespace {
return VALUE_DRAW + Value(2 * (thisThread->nodes & 1) - 1);
}

// Skill structure is used to implement strength limit
struct Skill {
explicit Skill(int l) : level(l) {}
bool enabled() const { return level < 20; }
bool time_to_pick(Depth depth) const { return depth == 1 + level; }
Move pick_best(size_t multiPV);

int level;
Move best = MOVE_NONE;
};

template <NodeType nodeType>
Value search(Position& pos, Stack* ss, Value alpha, Value beta, Depth depth, bool cutNode);

Expand Down Expand Up @@ -225,7 +214,6 @@ void MainThread::search() {

if ( int(Options["MultiPV"]) == 1
&& !Limits.depth
&& !(Skill(Options["Skill Level"]).enabled() || int(Options["UCI_LimitStrength"]))
&& rootMoves[0].pv[0] != MOVE_NONE)
bestThread = Threads.get_best_thread();

Expand Down Expand Up @@ -290,26 +278,8 @@ void Thread::search() {
std::fill(&lowPlyHistory[MAX_LPH - 2][0], &lowPlyHistory.back().back() + 1, 0);

size_t multiPV = size_t(Options["MultiPV"]);

// Pick integer skill levels, but non-deterministically round up or down
// such that the average integer skill corresponds to the input floating point one.
// UCI_Elo is converted to a suitable fractional skill level, using anchoring
// to CCRL Elo (goldfish 1.13 = 2000) and a fit through Ordo derived Elo
// for match (TC 60+0.6) results spanning a wide range of k values.
PRNG rng(now());
double floatLevel = Options["UCI_LimitStrength"] ?
std::clamp(std::pow((Options["UCI_Elo"] - 1346.6) / 143.4, 1 / 0.806), 0.0, 20.0) :
double(Options["Skill Level"]);
int intLevel = int(floatLevel) +
((floatLevel - int(floatLevel)) * 1024 > rng.rand<unsigned>() % 1024 ? 1 : 0);
Skill skill(intLevel);

// When playing with strength handicap enable MultiPV search that we will
// use behind the scenes to retrieve a set of possible moves.
if (skill.enabled())
multiPV = std::max(multiPV, (size_t)4);

multiPV = std::min(multiPV, rootMoves.size());

ttHitAverage = TtHitAverageWindow * TtHitAverageResolution / 2;

trend = SCORE_ZERO;
Expand Down Expand Up @@ -445,10 +415,6 @@ void Thread::search() {
if (!mainThread)
continue;

// If skill level is enabled and time is up, pick a sub-optimal best move
if (skill.enabled() && skill.time_to_pick(rootDepth))
skill.pick_best(multiPV);

// Do we have time for the next iteration? Can we stop searching now?
if ( Limits.use_time_management()
&& !Threads.stop
Expand Down Expand Up @@ -504,10 +470,6 @@ void Thread::search() {

mainThread->previousTimeReduction = timeReduction;

// If skill level is enabled, swap best PV line with the sub-optimal one
if (skill.enabled())
std::swap(rootMoves[0], *std::find(rootMoves.begin(), rootMoves.end(),
skill.best ? skill.best : skill.pick_best(multiPV)));
}


Expand Down Expand Up @@ -1729,39 +1691,6 @@ namespace {
thisThread->lowPlyHistory[ss->ply][from_to(move)] << stat_bonus(depth - 7);
}

// When playing with strength handicap, choose best move among a set of RootMoves
// using a statistical rule dependent on 'level'. Idea by Heinz van Saanen.

Move Skill::pick_best(size_t multiPV) {

const RootMoves& rootMoves = Threads.main()->rootMoves;
static PRNG rng(now()); // PRNG sequence should be non-deterministic

// RootMoves are already sorted by score in descending order
Value topScore = rootMoves[0].score;
int delta = std::min(topScore - rootMoves[multiPV - 1].score, PawnValueMg);
int weakness = 120 - 2 * level;
int maxScore = -VALUE_INFINITE;

// Choose best move. For each move score we add two terms, both dependent on
// weakness. One is deterministic and bigger for weaker levels, and one is
// random. Then we choose the move with the resulting highest score.
for (size_t i = 0; i < multiPV; ++i)
{
// This is our magic formula
int push = ( weakness * int(topScore - rootMoves[i].score)
+ delta * (rng.rand<unsigned>() % weakness)) / 128;

if (rootMoves[i].score + push >= maxScore)
{
maxScore = rootMoves[i].score + push;
best = rootMoves[i].pv[0];
}
}

return best;
}

} // namespace


Expand Down
16 changes: 14 additions & 2 deletions src/ucioption.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <algorithm>
#include <cassert>
#include <cmath>
#include <ostream>
#include <sstream>

Expand All @@ -37,12 +38,23 @@ UCI::OptionsMap Options; // Global object

namespace UCI {


constexpr float exponent = 0.66;
constexpr int Elo_max = 5200;
constexpr int Elo_min = 750;

/// 'On change' actions, triggered by an option's value change
void on_clear_hash(const Option&) { Search::clear(); }
void on_hash_size(const Option& o) { TT.resize(size_t(o)); }
void on_logger(const Option& o) { start_logger(o); }
void on_threads(const Option& o) { Threads.set(size_t(o)); }
void on_tb_path(const Option& o) { Tablebases::init(o); }
void on_limit_strength(const Option& o) { Eval::limitStrength = o; }
void on_uci_elo(const Option& o) {
Eval::randomEvalPerturb = int(1000 * std::pow(Elo_max - o , exponent) /
std::pow(Elo_max - Elo_min, exponent));
}

void on_use_NNUE(const Option& ) { Eval::NNUE::init(); }
void on_eval_file(const Option& ) { Eval::NNUE::init(); }

Expand Down Expand Up @@ -72,8 +84,8 @@ void init(OptionsMap& o) {
o["nodestime"] << Option(0, 0, 10000);
o["UCI_Chess960"] << Option(false);
o["UCI_AnalyseMode"] << Option(false);
o["UCI_LimitStrength"] << Option(false);
o["UCI_Elo"] << Option(1350, 1350, 2850);
o["UCI_LimitStrength"] << Option(false, on_limit_strength);
o["UCI_Elo"] << Option(1000, Elo_min , Elo_max, on_uci_elo);
o["UCI_ShowWDL"] << Option(false);
o["SyzygyPath"] << Option("<empty>", on_tb_path);
o["SyzygyProbeDepth"] << Option(1, 1, 100);
Expand Down

0 comments on commit caa7faa

Please sign in to comment.