From cc59aa697bf3e3e6ab9243d51f3e11b8b9e54167 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Hundeb=C3=B8ll?= Date: Fri, 17 May 2024 10:13:28 +0200 Subject: [PATCH] Implement GNU Make 4.4+ jobserver fifo / semaphore client support The principle of such a job server is rather simple: Before starting a new job (edge in ninja-speak), a token must be acquired from an external entity. On posix systems, that entity is simply a fifo filled with N characters. On win32 systems it is a semaphore initialized to N. Once a job is finished, the token must be returned to the external entity. This functionality is desired when ninja is used as part of a bigger build, such as builds with Yocto/OpenEmbedded, Buildroot and Android. Here, multiple compile jobs are executed in parallel to maximize cpu utilization, but if each compile job uses all available cores, the system is over loaded. --- CMakeLists.txt | 7 ++- configure.py | 3 ++ src/build.cc | 28 ++++++++--- src/build.h | 9 +++- src/build_test.cc | 7 ++- src/jobserver-posix.cc | 69 +++++++++++++++++++++++++++ src/jobserver-win32.cc | 58 +++++++++++++++++++++++ src/jobserver.cc | 94 ++++++++++++++++++++++++++++++++++++ src/jobserver.h | 105 +++++++++++++++++++++++++++++++++++++++++ 9 files changed, 370 insertions(+), 10 deletions(-) create mode 100644 src/jobserver-posix.cc create mode 100644 src/jobserver-win32.cc create mode 100644 src/jobserver.cc create mode 100644 src/jobserver.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b8fdee7d3a..0032f515d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,6 +138,7 @@ add_library(libninja OBJECT src/eval_env.cc src/graph.cc src/graphviz.cc + src/jobserver.cc src/json.cc src/line_printer.cc src/manifest_parser.cc @@ -153,6 +154,7 @@ add_library(libninja OBJECT if(WIN32) target_sources(libninja PRIVATE src/subprocess-win32.cc + src/jobserver-win32.cc src/includes_normalize-win32.cc src/msvc_helper-win32.cc src/msvc_helper_main-win32.cc @@ -169,7 +171,10 @@ if(WIN32) # errors by telling windows.h to not define those two. add_compile_definitions(NOMINMAX) else() - target_sources(libninja PRIVATE src/subprocess-posix.cc) + target_sources(libninja PRIVATE + src/subprocess-posix.cc + src/jobserver-posix.cc + ) if(CMAKE_SYSTEM_NAME STREQUAL "OS400" OR CMAKE_SYSTEM_NAME STREQUAL "AIX") target_sources(libninja PRIVATE src/getopt.c) # Build getopt.c, which can be compiled as either C or C++, as C++ diff --git a/configure.py b/configure.py index c88daad508..1a12e5c0c8 100755 --- a/configure.py +++ b/configure.py @@ -542,6 +542,7 @@ def has_re2c() -> bool: 'eval_env', 'graph', 'graphviz', + 'jobserver', 'json', 'line_printer', 'manifest_parser', @@ -556,6 +557,7 @@ def has_re2c() -> bool: objs += cxx(name, variables=cxxvariables) if platform.is_windows(): for name in ['subprocess-win32', + 'jobserver-win32', 'includes_normalize-win32', 'msvc_helper-win32', 'msvc_helper_main-win32']: @@ -565,6 +567,7 @@ def has_re2c() -> bool: objs += cc('getopt') else: objs += cxx('subprocess-posix') + objs += cxx('jobserver-posix') if platform.is_aix(): objs += cc('getopt') if platform.is_msvc(): diff --git a/src/build.cc b/src/build.cc index deb8f04c8b..7875ef3066 100644 --- a/src/build.cc +++ b/src/build.cc @@ -51,7 +51,7 @@ struct DryRunCommandRunner : public CommandRunner { virtual ~DryRunCommandRunner() {} // Overridden from CommandRunner: - virtual size_t CanRunMore() const; + virtual size_t CanRunMore(bool jobserver_enabled) const; virtual bool StartCommand(Edge* edge); virtual bool WaitForCommand(Result* result); @@ -59,7 +59,7 @@ struct DryRunCommandRunner : public CommandRunner { queue finished_; }; -size_t DryRunCommandRunner::CanRunMore() const { +size_t DryRunCommandRunner::CanRunMore(bool jobserver_enabled) const { return SIZE_MAX; } @@ -164,6 +164,11 @@ Edge* Plan::FindWork() { if (ready_.empty()) return NULL; + // Don't initiate more work if the jobserver cannot acquire more tokens + if (jobserver_.Enabled() && !jobserver_.Acquire()) { + return NULL; + } + Edge* work = ready_.top(); ready_.pop(); return work; @@ -201,6 +206,11 @@ bool Plan::EdgeFinished(Edge* edge, EdgeResult result, string* err) { edge->pool()->EdgeFinished(*edge); edge->pool()->RetrieveReadyEdges(&ready_); + // Return the token for acquired for this very edge to the jobserver + if (jobserver_.Enabled()) { + jobserver_.Release(); + } + // The rest of this function only applies to successful commands. if (result != kEdgeSucceeded) return true; @@ -579,6 +589,7 @@ void Plan::ScheduleInitialEdges() { } void Plan::PrepareQueue() { + jobserver_.Init(); ComputeCriticalPath(); ScheduleInitialEdges(); } @@ -596,7 +607,7 @@ void Plan::Dump() const { struct RealCommandRunner : public CommandRunner { explicit RealCommandRunner(const BuildConfig& config) : config_(config) {} virtual ~RealCommandRunner() {} - virtual size_t CanRunMore() const; + virtual size_t CanRunMore(bool jobserver_enabled) const; virtual bool StartCommand(Edge* edge); virtual bool WaitForCommand(Result* result); virtual vector GetActiveEdges(); @@ -619,12 +630,17 @@ void RealCommandRunner::Abort() { subprocs_.Clear(); } -size_t RealCommandRunner::CanRunMore() const { +size_t RealCommandRunner::CanRunMore(bool jobserver_enabled) const { size_t subproc_number = subprocs_.running_.size() + subprocs_.finished_.size(); int64_t capacity = config_.parallelism - subproc_number; + // Return "infinite" capacity if a jobserver is used to limit the number + // of parallel subprocesses instead. + if (jobserver_enabled) + return SIZE_MAX; + if (config_.max_load_average > 0.0f) { int load_capacity = config_.max_load_average - GetLoadAverage(); if (load_capacity < capacity) @@ -792,7 +808,7 @@ bool Builder::Build(string* err) { while (plan_.more_to_do()) { // See if we can start any more commands. if (failures_allowed) { - size_t capacity = command_runner_->CanRunMore(); + size_t capacity = command_runner_->CanRunMore(plan_.JobserverEnabled()); while (capacity > 0) { Edge* edge = plan_.FindWork(); if (!edge) @@ -820,7 +836,7 @@ bool Builder::Build(string* err) { --capacity; // Re-evaluate capacity. - size_t current_capacity = command_runner_->CanRunMore(); + size_t current_capacity = command_runner_->CanRunMore(plan_.JobserverEnabled()); if (current_capacity < capacity) capacity = current_capacity; } diff --git a/src/build.h b/src/build.h index 9bb0c70b5c..354df740b6 100644 --- a/src/build.h +++ b/src/build.h @@ -24,6 +24,7 @@ #include "depfile_parser.h" #include "exit_status.h" #include "graph.h" +#include "jobserver.h" #include "util.h" // int64_t struct BuildLog; @@ -52,6 +53,9 @@ struct Plan { /// Returns true if there's more work to be done. bool more_to_do() const { return wanted_edges_ > 0 && command_edges_ > 0; } + /// Jobserver status used to skip capacity based on load average + bool JobserverEnabled() const { return jobserver_.Enabled(); } + /// Dumps the current state of the plan. void Dump() const; @@ -139,6 +143,9 @@ struct Plan { /// Total remaining number of wanted edges. int wanted_edges_; + + /// Jobserver client + Jobserver jobserver_; }; /// CommandRunner is an interface that wraps running the build @@ -146,7 +153,7 @@ struct Plan { /// RealCommandRunner is an implementation that actually runs commands. struct CommandRunner { virtual ~CommandRunner() {} - virtual size_t CanRunMore() const = 0; + virtual size_t CanRunMore(bool jobserver_enabled) const = 0; virtual bool StartCommand(Edge* edge) = 0; /// The result of waiting for a command. diff --git a/src/build_test.cc b/src/build_test.cc index c84190a040..84d9ceed1e 100644 --- a/src/build_test.cc +++ b/src/build_test.cc @@ -521,7 +521,7 @@ struct FakeCommandRunner : public CommandRunner { max_active_edges_(1), fs_(fs) {} // CommandRunner impl - virtual size_t CanRunMore() const; + virtual size_t CanRunMore(bool jobserver_enabled) const; virtual bool StartCommand(Edge* edge); virtual bool WaitForCommand(Result* result); virtual vector GetActiveEdges(); @@ -622,10 +622,13 @@ void BuildTest::RebuildTarget(const string& target, const char* manifest, builder.command_runner_.release(); } -size_t FakeCommandRunner::CanRunMore() const { +size_t FakeCommandRunner::CanRunMore(bool jobserver_enabled) const { if (active_edges_.size() < max_active_edges_) return SIZE_MAX; + if (jobserver_enabled) + return SIZE_MAX; + return 0; } diff --git a/src/jobserver-posix.cc b/src/jobserver-posix.cc new file mode 100644 index 0000000000..f90bb44314 --- /dev/null +++ b/src/jobserver-posix.cc @@ -0,0 +1,69 @@ +// Copyright 2024 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "jobserver.h" +#include "util.h" + +void Jobserver::Init() { + assert(fd_ < 0); + + if (!ParseJobserverAuth("fifo")) { + return; + } + + const char *jobserver = jobserver_name_.c_str(); + + fd_ = open(jobserver, O_NONBLOCK | O_CLOEXEC | O_RDWR); + if (fd_ < 0) { + Fatal("failed to open jobserver: %s: %s", jobserver, strerror(errno)); + } + + Info("Using jobserver: %s", jobserver); +} + +Jobserver::~Jobserver() { + assert(token_count_ == 0); + + if (fd_ >= 0) { + close(fd_); + } +} + +bool Jobserver::Enabled() const { + return fd_ >= 0; +} + +bool Jobserver::AcquireToken() { + char token; + int res = read(fd_, &token, 1); + if (res < 0 && errno != EAGAIN && errno != EWOULDBLOCK) { + Fatal("failed to read jobserver token: %s", strerror(errno)); + } + + return res > 0; +} + +void Jobserver::ReleaseToken() { + char token = '+'; + int res = write(fd_, &token, 1); + if (res != 1) { + Fatal("failed to write token: %s: %s", jobserver_name_.c_str(), + strerror(errno)); + } +} diff --git a/src/jobserver-win32.cc b/src/jobserver-win32.cc new file mode 100644 index 0000000000..7e82d54e3f --- /dev/null +++ b/src/jobserver-win32.cc @@ -0,0 +1,58 @@ +// Copyright 2024 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "jobserver.h" +#include "util.h" + +void Jobserver::Init() { + assert(sem_ == INVALID_HANDLE_VALUE); + + if (!ParseJobserverAuth("sem")) { + return; + } + + const char *name = jobserver_name_.c_str(); + + sem_ = OpenSemaphore(SEMAPHORE_ALL_ACCESS, false, name); + if (sem_ == INVALID_HANDLE_VALUE) { + Win32Fatal("OpenSemaphore"); + } + + Info("using jobserver: %s", name); +} + +Jobserver::~Jobserver() { + assert(token_count_ == 0); + + if (sem_ != INVALID_HANDLE_VALUE) { + CloseHandle(sem_); + } +} + +bool Jobserver::Enabled() const { + return sem_ != INVALID_HANDLE_VALUE; +} + +bool Jobserver::AcquireToken() { + return WaitForSingleObject(sem_, 0) == WAIT_OBJECT_0; +} + +void Jobserver::ReleaseToken() { + if (ReleaseSemaphore(sem_, 1, NULL)) { + Win32Fatal("ReleaseSemaphore"); + } +} diff --git a/src/jobserver.cc b/src/jobserver.cc new file mode 100644 index 0000000000..4b501353a8 --- /dev/null +++ b/src/jobserver.cc @@ -0,0 +1,94 @@ +// Copyright 2024 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "jobserver.h" + +bool Jobserver::Acquire() { + // The first token is implicitly handed to the ninja process, so don't + // acquire it from the jobserver + if (token_count_ == 0 || AcquireToken()) { + token_count_++; + return true; + } + + return false; +} + +void Jobserver::Release() { + assert(token_count_ >= 1); + token_count_--; + + // Don't return first token to the jobserver, as it is implicitly handed + // to the ninja process + if (token_count_ > 0) { + ReleaseToken(); + } +} + +bool Jobserver::ParseJobserverAuth(const char *type) { + const char *jobserver_auth = "--jobserver-auth="; + const char *makeflags = getenv("MAKEFLAGS"); + + // Return early if no make flags are passed in the environment + if (makeflags == NULL) { + return false; + } + + // Search the make flags for a jobserver-auth argument + const char *str_begin = strstr(makeflags, jobserver_auth); + if (str_begin == NULL) { + return false; + } + + // Advance the string pointer to just past the = character + str_begin += strlen(jobserver_auth); + + // Find the length of the type value by searching for the following colon + const char *str_end = strchr(str_begin, ':'); + if (str_end == NULL) { + Warning("invalid --jobserver-auth value: '%s'", str_begin); + return false; + } + + // Ignore the argument if the length or the value of the type value doesn't + // match the requested type (i.e. "fifo" on posix or "sem" on windows). + if (strlen(type) != static_cast(str_end - str_begin) || + strncmp(str_begin, type, str_end - str_begin)) { + Warning("invalid jobserver type: got %.*s; expected %s", + str_end - str_begin, str_begin, type); + return false; + } + + // Advance the string pointer to just after the : character + str_begin = str_end + 1; + + // Find the length of the jobserver path/name value by searching for the + // following space or the end of the string. + str_end = strchr(str_begin, ' '); + if (str_end == NULL) { + jobserver_name_ = std::string(str_begin); + } else { + jobserver_name_ = std::string(str_begin, str_end - str_begin); + } + + if (jobserver_name_.empty()) { + Warning("invalid --jobserver-auth value: ''"); + return false; + } + + return true; +} diff --git a/src/jobserver.h b/src/jobserver.h new file mode 100644 index 0000000000..9174095b59 --- /dev/null +++ b/src/jobserver.h @@ -0,0 +1,105 @@ +// Copyright 2024 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined( _WIN32) +#include +#endif + +#include +#include + +#include "util.h" + +/// Jobserver limits parallelism by acquiring tokens from an external +/// pool before running commands. On posix systems the pool is a fifo filled +/// with N characters. On windows systems the pool is a semaphore initialized +/// to N. When a command is finished, the acquired token is released by writing +/// it back to fifo / increasing the semaphore count. +/// +/// The jobserver functionality is enabled by passing +/// --jobserver-auth=: in the MAKEFLAGS environment variable. On +/// posix systems, is 'fifo' and is a path to a fifo. On windows +/// systems, is 'sem' and is the name of a semaphore. +/// +/// The class is used in build.cc by calling Init() to parse the MAKEFLAGS +/// argument and open the fifo / semaphore. Once enabled, Acquire() must be +/// called to try to acquire a token from the pool. If a token is acquired, a +/// new command can be started. Once the command is completed, Release() must +/// be called to return the token to the pool. +/// +/// Note that this class implements the jobserver functionality from GNU make +/// v4.4 and later. Older versions of make passes open pipe file descriptors +/// to sub-makes and specifies the file descriptor numbers using +/// --jobserver-auth=R,W in the MAKEFLAGS environment variable. The older pipe +/// method is deliberately not implemented here, as it is not as simple as the +/// fifo method. +struct Jobserver { + ~Jobserver(); + + /// Parse the MAKEFLAGS environment variable to receive the path / name of the + /// token pool, and open the handle to the pool. If a jobserver argument is + /// found in the MAKEFLAGS environment variable, and the handle is + /// successfully opened, subsequent calls to Enable() returns true. + /// If a jobserver argument is found, but the handle fails to be opened, the + /// ninja process is aborted with an error. + void Init(); + + /// Return true if jobserver functionality is enabled and initialized. + bool Enabled() const; + + /// Try to to acquire a token from the external token pool (without blocking). + /// Should be called every time Ninja needs to start a command process. + /// Return true on success (token acquired), and false on failure (no tokens + /// available). First call always succeeds. + bool Acquire(); + + /// Return a previously acquired token to the external token pool. Must be + /// called for any _successful_ call to Acquire(). Normally when a command + /// subprocess completes, or when Ninja itself exits, even in case of errors. + void Release(); + +private: + /// Parse the --jobserver-auth argument from the MAKEFLAGS environment + /// variable. Return true if the argument is found and correctly parsed. + /// Return false if the argument is not found, or fails to parse. + bool ParseJobserverAuth(const char *type); + + /// Implementation specific method to acquire a token from the external pool, + /// which is called for all but the first requested tokens. + bool AcquireToken(); + + /// Implementation specific method to release a token to the external pool, + /// which is called for all but the last released tokens. + void ReleaseToken(); + + /// Path to the fifo on posix systems or name of the semaphore on windows + /// systems. + std::string jobserver_name_; + + /// Number of currently acquired tokens. Used to know when the first (free) + /// token has been acquired / released, and to verify that all acquired tokens + /// have been released before exiting. + size_t token_count_ = 0; + +#ifdef _WIN32 + /// Handle to the opened semaphore used as external token pool. + HANDLE sem_ = INVALID_HANDLE_VALUE; +#else + /// Non-blocking file descriptor for the opened fifo used as the external + /// token pool. + int fd_ = -1; +#endif +};