From f9623e5f52f24080a28d699c2beb4e67542e4409 Mon Sep 17 00:00:00 2001 From: Viren6 <94880762+Viren6@users.noreply.github.com> Date: Sat, 17 Aug 2024 08:26:22 +0100 Subject: [PATCH] Switch from cutechess-cli to fastchess This PR switches from cutechess-cli to fast-chess. cutechess-cli has been serving us well in the past years, however, some issues have accumulated, namely the difficulty of compiling cutechess-cli, the observed timeouts at high concurrency and short TC, and e.g. slowness when indexing larger books. fast-chess https://github.com/Disservin/fast-chess has addressed these issues, and has now probably become mature enough to serve as the game manager for montytest. As an example of its ability to deal with short TC and high concurrency: https://dfts-0.pigazzini.it/tests/view/669249cdbee8253775cede32 with concurrency 25, and TC 1+0.01s no timeouts are observed. fast-chess is built from sources, with the zip download as well as the binary cached as needed. There is fine-grained control over which version of fast-chess is used, so we can easily upgrade for new features. In this PR, fast-chess is built in cutechess compatibility to facilitate integration, and to benefit from the existing montytest checks. Once validated, we should be able to switch easily to its native mode, which can output trinomial and pentanomial results, and we should be able significantly simplify the worker's book-keeping. Co-Authored-By: Joost VandeVondele <4202567+vondele@users.noreply.github.com> --- .github/workflows/worker_msys2.yaml | 24 ++- .github/workflows/worker_posix.yaml | 3 +- AUTHORS | 7 +- server/montytest/api.py | 2 +- server/montytest/rundb.py | 17 -- worker/games.py | 270 ++++++++++------------------ worker/sri.txt | 2 +- worker/tests/test_worker.py | 8 +- worker/worker.py | 209 ++++++++++----------- 9 files changed, 240 insertions(+), 302 deletions(-) diff --git a/.github/workflows/worker_msys2.yaml b/.github/workflows/worker_msys2.yaml index 83c79ffe..1beea102 100644 --- a/.github/workflows/worker_msys2.yaml +++ b/.github/workflows/worker_msys2.yaml @@ -1,4 +1,4 @@ -name: CI worker cargo +name: CI worker msys2 on: [push, pull_request, workflow_dispatch] @@ -6,16 +6,28 @@ jobs: test: runs-on: windows-latest strategy: + fail-fast: false matrix: - toolchain: [stable, beta, nightly] + include: + - { sys: mingw64, env: x86_64, comp: gcc } defaults: run: - shell: bash + shell: msys2 {0} working-directory: worker steps: - - name: Checkout code - uses: actions/checkout@v4 + - name: Setup msys and install required packages + uses: msys2/setup-msys2@v2 + with: + update: true + msystem: ${{ matrix.sys }} + install: >- + make + mingw-w64-${{ matrix.env }}-python3 + mingw-w64-${{ matrix.env }}-${{ matrix.comp }} + mingw-w64-x86_64-rust + + - uses: actions/checkout@v4 with: fetch-depth: 0 @@ -25,4 +37,4 @@ jobs: - name: Run worker tests run: | - python -m unittest discover -vb -s tests + python -m unittest discover -vb -s tests \ No newline at end of file diff --git a/.github/workflows/worker_posix.yaml b/.github/workflows/worker_posix.yaml index 643bad0f..7589d8bd 100644 --- a/.github/workflows/worker_posix.yaml +++ b/.github/workflows/worker_posix.yaml @@ -6,6 +6,7 @@ jobs: test: runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: os: [ubuntu-20.04, macos-13] python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] @@ -29,4 +30,4 @@ jobs: - name: Run worker tests run: | - python -m unittest discover -vb -s tests + python -m unittest discover -vb -s tests \ No newline at end of file diff --git a/AUTHORS b/AUTHORS index 2beebf05..05c8f998 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,6 +1,7 @@ -# List of authors for montytest, as of December 14, 2023 +# List of authors for montytest, as of August 17, 2024 Gary Linscott (glinscott) +Viren (Viren) Andy Pilate (Cubox) Bojun Guo (noobpwnftw) @@ -14,6 +15,7 @@ Fanael Linithien (Fanael) Fauzi Akram Dabat (FauziAkram) FieryDragonLord Gabe (MrBrain295) +Gahtan Nahdi (gahtan-syarif) Giacomo Lorenzetti (G-Lorenz) Gian-Carlo Pascutto (gcp) Henri Wiechers (hwiechers) @@ -55,8 +57,7 @@ Vince Negri (cuddlestmonkey) Werner Fenchel (wfenchel) Thanks to: -Ilari Pihlajisto - https://github.com/cutechess/cutechess, which drives all the games! -Tord Romstad - for Glaurung +Disservin - https://github.com/Disservin/fastchess, which drives all the games! Dariusz Orzechowski - opening book idea (perft generation + position score filtering) Stefan Pohl - Unbalanced Human Openings books https://flag-sprites.com - for the great flag sprites diff --git a/server/montytest/api.py b/server/montytest/api.py index 47f5258a..10768ef2 100644 --- a/server/montytest/api.py +++ b/server/montytest/api.py @@ -34,7 +34,7 @@ according to the route/URL mapping defined in `__init__.py`. """ -WORKER_VERSION = 14 +WORKER_VERSION = 20 @exception_view_config(HTTPException) diff --git a/server/montytest/rundb.py b/server/montytest/rundb.py index 542dba30..9b7a2c24 100644 --- a/server/montytest/rundb.py +++ b/server/montytest/rundb.py @@ -1235,23 +1235,6 @@ def priority(run): # lower is better if not have_binary: continue - # To avoid time losses in the case of large concurrency and short TC, - # probably due to cutechess-cli as discussed in issue #822, - # assign linux workers to LTC or multi-threaded jobs - # and windows workers only to LTC jobs - if max_threads > 32: - if "windows" in worker_info["uname"].lower(): - tc_too_short = get_tc_ratio(run["args"]["tc"], base="55+0.5") < 1.0 - else: - tc_too_short = ( - get_tc_ratio( - run["args"]["tc"], run["args"]["threads"], "35+0.3" - ) - < 1.0 - ) - if tc_too_short: - continue - # Limit the number of cores. # Currently this is only done for spsa. if "spsa" in run["args"]: diff --git a/worker/games.py b/worker/games.py index fc1edae0..73c512e9 100644 --- a/worker/games.py +++ b/worker/games.py @@ -67,7 +67,7 @@ def is_64bit(): HTTP_TIMEOUT = 30.0 -CUTECHESS_KILL_TIMEOUT = 15.0 +FASTCHESS_KILL_TIMEOUT = 15.0 UPDATE_RETRY_TIME = 15.0 RAWCONTENT_HOST = "https://raw.githubusercontent.com" @@ -473,24 +473,6 @@ def unzip(blob, save_dir): return file_list -def convert_book_move_counters(book_file): - # converts files with complete FENs, leaving others (incl. converted ones) unchanged - epds = [] - with open(book_file, "r") as file: - for fen in file: - fields = fen.split() - if len(fields) == 6 and fields[4].isdigit() and fields[5].isdigit(): - fields[4] = f"hmvc {fields[4]};" - fields[5] = f"fmvn {fields[5]};" - epds.append(" ".join(fields)) - else: - return - - with open(book_file, "w") as file: - for epd in epds: - file.write(epd + "\n") - - def setup_engine( destination, worker_dir, @@ -627,89 +609,7 @@ def enqueue_output(stream, queue): queue.put(line) -def update_pentanomial(line, rounds): - saved_rounds = copy.deepcopy(rounds) - saved_line = line - - def result_to_score(_result): - if _result == "1-0": - return 2 - elif _result == "0-1": - return 0 - elif _result == "1/2-1/2": - return 1 - else: - return -1 - - if "pentanomial" not in rounds.keys(): - rounds["pentanomial"] = 5 * [0] - if "trinomial" not in rounds.keys(): - rounds["trinomial"] = 3 * [0] - - saved_sum_trinomial = sum(rounds["trinomial"]) - current = {} - - # Parse line like this: - # Finished game 4 (Base-SHA vs New-SHA): 1/2-1/2 {Draw by adjudication} - line = line.split() - if line[0] == "Finished" and line[1] == "game" and len(line) >= 7: - round_ = int(line[2]) - rounds[round_] = current - current["white"] = line[3][1:] - current["black"] = line[5][:-2] - i = current["result"] = result_to_score(line[6]) - if round_ % 2 == 0: - if i != -1: - rounds["trinomial"][2 - i] += 1 # reversed colors - odd = round_ - 1 - even = round_ - else: - if i != -1: - rounds["trinomial"][i] += 1 - odd = round_ - even = round_ + 1 - if odd in rounds.keys() and even in rounds.keys(): - assert rounds[odd]["white"][0:3] == "New" - assert rounds[odd]["white"] == rounds[even]["black"] - assert rounds[odd]["black"] == rounds[even]["white"] - i = rounds[odd]["result"] - j = rounds[even]["result"] # even is reversed colors - if i != -1 and j != -1: - rounds["pentanomial"][i + 2 - j] += 1 - del rounds[odd] - del rounds[even] - rounds["trinomial"][i] -= 1 - rounds["trinomial"][2 - j] -= 1 - assert rounds["trinomial"][i] >= 0 - assert rounds["trinomial"][2 - j] >= 0 - - # make sure something happened, but not too much - # this sometimes fails: we want to understand why - assertion = ( - current.get("result", -1000) == -1 - or abs(sum(rounds["trinomial"]) - saved_sum_trinomial) == 1 - ) - if not assertion: - raise WorkerException( - "update_pentanomial() failed. line={}; rounds before={}; rounds after={}".format( - saved_line, saved_rounds, rounds - ) - ) - - -def validate_pentanomial(wld, rounds): - def results_to_score(results): - return sum([results[i] * (i / 2.0) for i in range(len(results))]) - - LDW = [wld[1], wld[2], wld[0]] - s3 = results_to_score(LDW) - s5 = results_to_score(rounds["pentanomial"]) + results_to_score(rounds["trinomial"]) - assert sum(LDW) == 2 * sum(rounds["pentanomial"]) + sum(rounds["trinomial"]) - epsilon = 1e-4 - assert abs(s5 - s3) < epsilon - - -def parse_cutechess_output( +def parse_fastchess_output( p, current_state, remote, result, spsa_tuning, games_to_play, batch_size, tc_limit ): hash_pattern = re.compile(r"(Base|New)-[a-f0-9]+") @@ -719,7 +619,23 @@ def shorten_hash(match): return "-".join([word[0], word[1][:10]]) saved_stats = copy.deepcopy(result["stats"]) - rounds = {} + + # patterns used to obtain fastchess WLD and ptnml results from the following block of info: + # -------------------------------------------------- + # Results of New-e443b2459e vs Base-e443b2459e (0.601+0.006, 1t, 16MB, UHO_Lichess_4852_v1.epd): + # Elo: -9.20 +/- 20.93, nElo: -11.50 +/- 26.11 + # LOS: 19.41 %, DrawRatio: 42.35 %, PairsRatio: 0.88 + # Games: 680, Wins: 248, Losses: 266, Draws: 166, Points: 331.0 (48.68 %) + # Ptnml(0-2): [43, 61, 144, 55, 37], WL/DD Ratio: 4.76 + # -------------------------------------------------- + pattern_WLD = re.compile( + r"Games: ([0-9]+), Wins: ([0-9]+), Losses: ([0-9]+), Draws: ([0-9]+), Points: ([0-9.]+) \(" + ) + pattern_ptnml = re.compile( + r"Ptnml\(0-2\): \[([0-9]+), ([0-9]+), ([0-9]+), ([0-9]+), ([0-9]+)\]" + ) + fastchess_WLD_results = None + fastchess_ptnml_results = None q = Queue() t_output = threading.Thread(target=enqueue_output, args=(p.stdout, q), daemon=True) @@ -737,7 +653,7 @@ def shorten_hash(match): except Empty: if p.poll() is not None: break - time.sleep(1) + time.sleep(0.1) continue line = hash_pattern.sub(shorten_hash, line) @@ -748,18 +664,22 @@ def shorten_hash(match): if num_games_updated == games_to_play: print("Finished match cleanly") else: - raise WorkerException("Finished match uncleanly") + raise WorkerException( + "Finished match uncleanly {} vs. required {}".format( + num_games_updated, games_to_play + ) + ) # Parse line like this: # Warning: New-SHA doesn't have option ThreatBySafePawn if "Warning:" in line and "doesn't have option" in line: - message = r'Cutechess-cli says: "{}"'.format(line) + message = r'fastchess says: "{}"'.format(line) raise RunException(message) # Parse line like this: # Warning: Invalid value for option P: -354 if "Warning:" in line and "Invalid value" in line: - message = r'Cutechess-cli says: "{}"'.format(line) + message = r'fastchess says: "{}"'.format(line) raise RunException(message) # Parse line like this: @@ -770,48 +690,57 @@ def shorten_hash(match): if "on time" in line: result["stats"]["time_losses"] += 1 - # Parse line like this: - # Score of monty vs base: 0 - 0 - 1 [0.500] 1 - if "Score" in line: - # Parsing sometimes fails. We want to understand why. + # fastchess WLD and pentanomial output parsing + m = pattern_WLD.search(line) + if m: try: - chunks = line.split(":") - chunks = chunks[1].split() - wld = [int(chunks[0]), int(chunks[2]), int(chunks[4])] - except: - raise WorkerException("Failed to parse score line: {}".format(line)) + fastchess_WLD_results = { + "games": int(m.group(1)), + "wins": int(m.group(2)), + "losses": int(m.group(3)), + "draws": int(m.group(4)), + "points": float(m.group(5)), + } + except Exception as e: + raise WorkerException( + "Failed to parse WLD line: {} leading to: {}".format(line, str(e)) + ) - validate_pentanomial( - wld, rounds - ) # check if cutechess-cli result is compatible with - # our own bookkeeping + m = pattern_ptnml.search(line) + if m: + try: + fastchess_ptnml_results = [int(m.group(i)) for i in range(1, 6)] + except Exception as e: + raise WorkerException( + "Failed to parse ptnml line: {} leading to: {}".format(line, str(e)) + ) - pentanomial = [ - rounds["pentanomial"][i] + saved_stats["pentanomial"][i] + # if we have parsed the block properly let's update results + if (fastchess_ptnml_results is not None) and ( + fastchess_WLD_results is not None + ): + result["stats"]["pentanomial"] = [ + fastchess_ptnml_results[i] + saved_stats["pentanomial"][i] for i in range(5) ] - result["stats"]["pentanomial"] = pentanomial - - wld_pairs = {} # trinomial frequencies of completed game pairs - # rounds['trinomial'] is ordered ldw - wld_pairs["wins"] = wld[0] - rounds["trinomial"][2] - wld_pairs["losses"] = wld[1] - rounds["trinomial"][0] - wld_pairs["draws"] = wld[2] - rounds["trinomial"][1] - - result["stats"]["wins"] = wld_pairs["wins"] + saved_stats["wins"] - result["stats"]["losses"] = wld_pairs["losses"] + saved_stats["losses"] - result["stats"]["draws"] = wld_pairs["draws"] + saved_stats["draws"] + result["stats"]["wins"] = ( + fastchess_WLD_results["wins"] + saved_stats["wins"] + ) + result["stats"]["losses"] = ( + fastchess_WLD_results["losses"] + saved_stats["losses"] + ) + result["stats"]["draws"] = ( + fastchess_WLD_results["draws"] + saved_stats["draws"] + ) if spsa_tuning: spsa = result["spsa"] - spsa["wins"] = wld_pairs["wins"] - spsa["losses"] = wld_pairs["losses"] - spsa["draws"] = wld_pairs["draws"] + spsa["wins"] = fastchess_WLD_results["wins"] + spsa["losses"] = fastchess_WLD_results["losses"] + spsa["draws"] = fastchess_WLD_results["draws"] - num_games_finished = ( - wld_pairs["wins"] + wld_pairs["losses"] + wld_pairs["draws"] - ) + num_games_finished = fastchess_WLD_results["games"] assert ( 2 * sum(result["stats"]["pentanomial"]) @@ -819,10 +748,13 @@ def shorten_hash(match): + result["stats"]["losses"] + result["stats"]["draws"] ) - assert num_games_finished == 2 * sum(rounds["pentanomial"]) + assert num_games_finished == 2 * sum(fastchess_ptnml_results) assert num_games_finished <= num_games_updated + batch_size assert num_games_finished <= games_to_play + fastchess_ptnml_results = None + fastchess_WLD_results = None + # Send an update_task request after a batch is full or if we have played all games. if (num_games_finished == num_games_updated + batch_size) or ( num_games_finished == games_to_play @@ -862,10 +794,6 @@ def shorten_hash(match): else: current_state["last_updated"] = datetime.now(timezone.utc) - # Act on line like this: - # Finished game 4 (Base-SHA vs New-SHA): 1/2-1/2 {Draw by adjudication} - if line.startswith("Finished game"): - update_pentanomial(line, rounds) else: raise WorkerException( "{} is past end time {}".format(datetime.now(timezone.utc), end_time) @@ -874,7 +802,7 @@ def shorten_hash(match): return True -def launch_cutechess( +def launch_fastchess( cmd, current_state, remote, result, spsa_tuning, games_to_play, batch_size, tc_limit ): if spsa_tuning: @@ -905,7 +833,7 @@ def launch_cutechess( w_params = [] b_params = [] - # Run cutechess-cli binary. + # Run fastchess-cli binary. # Stochastic rounding and probability for float N.p: (N, 1-p); (N+1, p) idx = cmd.index("_spsa_") cmd = ( @@ -930,7 +858,7 @@ def launch_cutechess( + cmd[idx + 1 :] ) - # print(cmd) + # print(cmd) try: with subprocess.Popen( cmd, @@ -952,7 +880,7 @@ def launch_cutechess( close_fds=not IS_WINDOWS, ) as p: try: - task_alive = parse_cutechess_output( + task_alive = parse_fastchess_output( p, current_state, remote, @@ -963,15 +891,15 @@ def launch_cutechess( tc_limit, ) finally: - # We nicely ask cutechess-cli to stop. + # We nicely ask fastchess to stop. try: send_sigint(p) except Exception as e: print("\nException in send_sigint:\n", e, sep="", file=sys.stderr) # now wait... - print("\nWaiting for cutechess-cli to finish ... ", end="", flush=True) + print("\nWaiting for fastchess to finish ... ", end="", flush=True) try: - p.wait(timeout=CUTECHESS_KILL_TIMEOUT) + p.wait(timeout=FASTCHESS_KILL_TIMEOUT) except subprocess.TimeoutExpired: print("timeout", flush=True) kill_process(p) @@ -979,12 +907,12 @@ def launch_cutechess( print("done", flush=True) except (OSError, subprocess.SubprocessError) as e: print( - "Exception starting cutechess:\n", + "Exception starting fastchess:\n", e, sep="", file=sys.stderr, ) - raise WorkerException("Unable to start cutechess. Error: {}".format(str(e))) + raise WorkerException("Unable to start fastchess. Error: {}".format(str(e))) return task_alive @@ -1000,7 +928,7 @@ def run_games( clear_binaries, global_cache, ): - # This is the main cutechess-cli driver. + # This is the main fastchess driver. # It is ok, and even expected, for this function to # raise exceptions, implicitly or explicitly, if a # task cannot be completed. @@ -1081,9 +1009,9 @@ def run_games( if "start" in task: print("Variable task sizes used. Opening offset = {}".format(opening_offset)) start_game_index = opening_offset + input_total_games - run_seed = int(hashlib.sha1(run["_id"].encode("utf-8")).hexdigest(), 16) % (2**30) + run_seed = int(hashlib.sha1(run["_id"].encode("utf-8")).hexdigest(), 16) % (2**64) - # Format options according to cutechess syntax. + # Format options according to fastchess syntax. def parse_options(s): results = [] chunks = s.split("=") @@ -1166,11 +1094,6 @@ def parse_options(s): blob = download_from_github(zipball) unzip(blob, testing_dir) - # convert .epd containing FENs into .epd containing EPDs with move counters - # only needed as long as cutechess-cli is the game manager - if book.endswith(".epd"): - convert_book_move_counters(testing_dir / book) - # Clean up the old networks (keeping the num_bkps most recent) num_bkps = 10 for old_net in sorted( @@ -1306,11 +1229,11 @@ def make_player(arg): if any(substring in book.upper() for substring in ["FRC", "960"]): variant = "fischerandom" - # Run cutechess binary. - cutechess = "cutechess-cli" + EXE_SUFFIX + # Run fastchess binary. + fastchess = "fastchess" + EXE_SUFFIX cmd = ( [ - os.path.join(testing_dir, cutechess), + os.path.join(testing_dir, fastchess), "-recover", "-repeat", "-games", @@ -1320,6 +1243,16 @@ def make_player(arg): "-tournament", "gauntlet", ] + + [ + "-ratinginterval", + "1", + "-scoreinterval", + "1", + "-autosaveinterval", + "0", + "-report", + "penta=true", + ] + pgnout + ["-site", "https://montychess.org/tests/view/" + run["_id"]] + [ @@ -1372,7 +1305,7 @@ def make_player(arg): + book_cmd ) - task_alive = launch_cutechess( + task_alive = launch_fastchess( cmd, current_state, remote, @@ -1520,11 +1453,6 @@ def run_datagen_games( blob = download_from_github(zipball) unzip(blob, testing_dir) - # convert .epd containing FENs into .epd containing EPDs with move counters - # only needed as long as cutechess-cli is the game manager - if book.endswith(".epd"): - convert_book_move_counters(testing_dir / book) - # Verify that the signatures are correct. run_errors = [] try: @@ -1606,7 +1534,7 @@ def run_datagen_games( # now wait... print("\nWaiting for datagen to finish ... ", end="", flush=True) try: - p.wait(timeout=CUTECHESS_KILL_TIMEOUT) + p.wait(timeout=FASTCHESS_KILL_TIMEOUT) except subprocess.TimeoutExpired: print("timeout", flush=True) kill_process(p) diff --git a/worker/sri.txt b/worker/sri.txt index 34bdc3d2..e84c8d3a 100644 --- a/worker/sri.txt +++ b/worker/sri.txt @@ -1 +1 @@ -{"__version": 14, "updater.py": "gSJX/HbsPwsZnUZaFbAgF0zwWPWXv+LEw9jBhJkxFOrCH9CZS0+4U4nE2fJdeNze", "worker.py": "StfmRgS7N/I5sXJtWsmQ9ffLtY+JJJv5nr291dWIjfA+WeJiMAVpcNUc/GN2k0Zt", "games.py": "/IKLOrxmOTyK+T4WzMdHzNvFzm4cJPzM8LeQ5sHyidDdrwOq6VTXkpIUSoENfZTE"} +{"__version": 20, "updater.py": "gSJX/HbsPwsZnUZaFbAgF0zwWPWXv+LEw9jBhJkxFOrCH9CZS0+4U4nE2fJdeNze", "worker.py": "VgVVa7f6bdHWJU/uKVcAGApcegP/fZwgV45PLSbfGTHUmc66F51hGbGBZZRvhRMM", "games.py": "jdCHJDCixQ9Fa138HXoUcOvvHEAHO7BWq/3XUmr0GYHQ/zlRYyjuSueEQPxkMTg9"} diff --git a/worker/tests/test_worker.py b/worker/tests/test_worker.py index 23c81401..7c94f6f3 100644 --- a/worker/tests/test_worker.py +++ b/worker/tests/test_worker.py @@ -70,8 +70,12 @@ def test_sri(self): def test_toolchain_verification(self): self.assertTrue(worker.verify_toolchain()) - def test_setup_cutechess(self): - self.assertTrue(worker.setup_cutechess(Path.cwd())) + def test_setup_fastchess(self): + self.assertTrue( + worker.setup_fastchess( + Path.cwd(), list(worker.detect_compilers().keys())[0], 4, "" + ) + ) if __name__ == "__main__": diff --git a/worker/worker.py b/worker/worker.py index 1e390161..70731f06 100644 --- a/worker/worker.py +++ b/worker/worker.py @@ -16,6 +16,7 @@ import stat import subprocess import sys +import tempfile import threading import time import traceback @@ -41,9 +42,12 @@ RunException, WorkerException, backup_log, + cache_read, + cache_write, download_from_github, format_return_code, log, + requests_get, run_games, send_api_post_request, str_signal, @@ -65,7 +69,7 @@ MIN_CARGO_MAJOR = 1 MIN_CARGO_MINOR = 77 -WORKER_VERSION = 14 +WORKER_VERSION = 20 FILE_LIST = ["updater.py", "worker.py", "games.py"] HTTP_TIMEOUT = 30.0 INITIAL_RETRY_TIME = 15.0 @@ -100,8 +104,8 @@ worker.py : worker() worker.py : fetch_and_handle_task() [in loop] games.py : run_games() -games.py : launch_cutechess() [in loop for spsa] -games.py : parse_cutechess_output() +games.py : launch_fastchess() [in loop for spsa] +games.py : parse_fastchess_output() Apis used by the worker ======================= @@ -117,7 +121,7 @@ /api/request_task POST /api/nn/ GET /git/trees/master GET - /git/trees/master/blobs/ GET + /repos/Disservin/fastchess/zipball/ GET /git/trees/master/blobs/ GET /repos//zipball/ GET @@ -389,40 +393,17 @@ def get_credentials(config, options, args): return username, password -def download_cutechess(cutechess, save_dir): - if len(EXE_SUFFIX) > 0: - zipball = "cutechess-cli-win.zip" - elif IS_MACOS: - zipball = "cutechess-cli-macos-64bit.zip" - else: - zipball = "cutechess-cli-linux-{}.zip".format(platform.architecture()[0]) - try: - blob = download_from_github(zipball) - unzip(blob, save_dir) - - os.chmod(cutechess, os.stat(cutechess).st_mode | stat.S_IEXEC) - except Exception as e: - print( - "Exception downloading or extracting {}:\n".format(zipball), - e, - sep="", - file=sys.stderr, - ) - else: - print("Finished downloading {}".format(cutechess)) - +def verify_required_fastchess(fastchess_path, fastchess_sha): + # Verify that fastchess is working and has the required minimum version. -def verify_required_cutechess(cutechess_path): - # Verify that cutechess is working and has the required minimum version. - - if not cutechess_path.exists(): + if not fastchess_path.exists(): return False - print("Obtaining version info for {} ...".format(cutechess_path)) + print("Obtaining version info for {} ...".format(fastchess_path)) try: with subprocess.Popen( - [cutechess_path, "--version"], + [fastchess_path, "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True, @@ -430,98 +411,124 @@ def verify_required_cutechess(cutechess_path): close_fds=not IS_WINDOWS, ) as p: errors = p.stderr.read() - pattern = re.compile(r"cutechess-cli ([0-9]+)\.([0-9]+)\.([0-9]+)") - major, minor, patch = 0, 0, 0 + pattern = re.compile( + r"fastchess alpha [0-9]*.[0-9]*.[0-9]* [0-9]*-([0-9a-f-]*)$" + ) + short_sha = "" for line in iter(p.stdout.readline, ""): m = pattern.search(line) if m: print("Found", line.strip()) - major = int(m.group(1)) - minor = int(m.group(2)) - patch = int(m.group(3)) + short_sha = m.group(1) except (OSError, subprocess.SubprocessError) as e: - print("Unable to run cutechess-cli. Error: {}".format(str(e))) + print("Unable to run fastchess. Error: {}".format(str(e))) return False if p.returncode != 0: print( - "Unable to run cutechess-cli. Return code: {}. Error: {}".format( + "Unable to run fastchess. Return code: {}. Error: {}".format( format_return_code(p.returncode), errors ) ) return False - if major + minor + patch == 0: - print("Unable to find the version of cutechess-cli.") + if len(short_sha) < 7: + print( + "Unable to find a suitable sha of length 7 or more in the fastchess version." + ) return False - if (major, minor) < (1, 2): - print("Requires cutechess 1.2 or higher, found version doesn't match") + if not fastchess_sha.startswith(short_sha): + print( + "fastchess sha {} required but the version shows {}".format( + fastchess_sha, short_sha + ) + ) return False return True -def setup_cutechess(worker_dir): +def setup_fastchess(worker_dir, compiler, concurrency, global_cache): # Create the testing directory if missing. testing_dir = worker_dir / "testing" testing_dir.mkdir(exist_ok=True) - curr_dir = Path.cwd() + fastchess_sha = "3564c85837060d9a39baac2257e6e43456de84df" + username = "Disservin" + + fastchess = "fastchess" + EXE_SUFFIX + if verify_required_fastchess(testing_dir / fastchess, fastchess_sha): + return True + # build it ourselves try: - os.chdir(testing_dir) - except Exception as e: - print("Unable to enter {}. Error: {}".format(testing_dir, str(e))) - return False + item_url = ( + "https://api.github.com/repos/" + + username + + "/fastchess/zipball/" + + fastchess_sha + ) - cutechess = "cutechess-cli" + EXE_SUFFIX - cutechess_path = testing_dir / cutechess + print("Building fastchess from sources at {}".format(item_url)) - # Download cutechess-cli if missing or overwrite if there are issues. - if not verify_required_cutechess(cutechess_path): - download_cutechess(cutechess, testing_dir) - else: - os.chdir(curr_dir) - return True + should_cache = False + blob = cache_read(global_cache, fastchess_sha + ".zip") - ret = True + if blob is None: + print("Downloading {}".format(item_url)) + blob = requests_get(item_url).content + should_cache = True + else: + print("Using {} from global cache".format(fastchess_sha + ".zip")) - if not verify_required_cutechess(cutechess_path): - print( - "The downloaded cutechess-cli is not working. Trying to restore a backup copy ..." - ) - bkp_cutechess_clis = sorted( - worker_dir.glob("_testing_*/" + cutechess), - key=os.path.getctime, - reverse=True, - ) - if bkp_cutechess_clis: - bkp_cutechess_cli = bkp_cutechess_clis[0] - try: - shutil.copy(bkp_cutechess_cli, testing_dir) - except Exception as e: - print( - "Unable to copy {} to {}. Error: {}".format( - bkp_cutechess_cli, testing_dir, str(e) - ) - ) + tmp_dir = Path(tempfile.mkdtemp(dir=testing_dir)) + file_list = unzip(blob, tmp_dir) + prefix = os.path.commonprefix([n.filename for n in file_list]) - if not verify_required_cutechess(cutechess_path): - print( - "The backup copy {} doesn't work either ...".format( - bkp_cutechess_cli - ) + if should_cache: + cache_write(global_cache, fastchess_sha + ".zip", blob) + + cd = os.getcwd() + os.chdir(tmp_dir / prefix) + + cmds = [ + f"make -j{concurrency} tests CXX=g++ GIT_SHA={fastchess_sha[0:8]} GIT_DATE=01010101", + str(tmp_dir / prefix / ("fastchess-tests" + EXE_SUFFIX)), + "make clean", + f"make -j{concurrency} CXX=g++ GIT_SHA={fastchess_sha[0:8]} GIT_DATE=01010101", + ] + + for cmd in cmds: + print(cmd) + with subprocess.Popen( + cmd, + shell=True, + env=os.environ, + stderr=subprocess.PIPE, + universal_newlines=True, + bufsize=1, + close_fds=not IS_WINDOWS, + ) as p: + errors = p.stderr.readlines() + + if p.returncode: + raise WorkerException( + "Executing {} failed. Error: {}".format(cmd, errors) ) - print("No suitable cutechess-cli found") - ret = False - else: - print("No backup copy found") - print("No suitable cutechess-cli found") - ret = False + shutil.copy("fastchess" + EXE_SUFFIX, testing_dir) + os.chdir(cd) + shutil.rmtree(tmp_dir) - os.chdir(curr_dir) - return ret + except Exception as e: + print( + "Exception downloading, extracting or building fastchess:\n", + e, + sep="", + file=sys.stderr, + ) + + return verify_required_fastchess(testing_dir / fastchess, fastchess_sha) def validate(config, schema): @@ -818,7 +825,7 @@ def my_error(e): # Limit concurrency so that at least STC tests can run with the evailable memory # The memory need per engine is 16 for the TT Hash, 10 for the process 138 for the net and 16 per thread - # 60 is the need for cutechess-cli + # 60 is the need for fastchess-cli # These numbers need to be up-to-date with the server values STC_memory = 2 * (16 + 10 + 138 + 16) max_concurrency = int((options.max_memory - 60) / STC_memory) @@ -1477,12 +1484,21 @@ def worker(): print("Exception verifying worker version:\n", e, sep="", file=sys.stderr) return 1 + # Assemble the config/options data as well as some other data in a + # "worker_info" dictionary. + # This data will be sent to the server when a new task is requested. + + compiler, major, minor, patchlevel = options.compiler + print("Using {} {}.{}.{}".format(compiler, major, minor, patchlevel)) + # Check for common tool chain issues if not verify_toolchain(): return 1 - # Make sure we have a working cutechess-cli - if not setup_cutechess(worker_dir): + # Make sure we have a working fastchess + if not setup_fastchess( + worker_dir, compiler, options.concurrency, options.global_cache + ): return 1 # Check if we are running an unmodified worker @@ -1490,13 +1506,6 @@ def worker(): if unmodified is None: return 1 - # Assemble the config/options data as well as some other data in a - # "worker_info" dictionary. - # This data will be sent to the server when a new task is requested. - - compiler, major, minor, patchlevel = options.compiler - print("Using {} {}.{}.{}".format(compiler, major, minor, patchlevel)) - try: brand = cpuinfo.get_cpu_info()["brand_raw"] except: