Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GNU make jobserver style "fifo" support #2263

Open
wants to merge 20 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ add_library(libninja OBJECT
src/state.cc
src/status.cc
src/string_piece_util.cc
src/tokenpool-gnu-make.cc
src/util.cc
src/version.cc
)
Expand All @@ -129,13 +130,17 @@ if(WIN32)
src/msvc_helper_main-win32.cc
src/getopt.c
src/minidump-win32.cc
src/tokenpool-gnu-make-win32.cc
)
# Build getopt.c, which can be compiled as either C or C++, as C++
# so that build environments which lack a C compiler, but have a C++
# compiler may build ninja.
set_source_files_properties(src/getopt.c PROPERTIES LANGUAGE CXX)
else()
target_sources(libninja PRIVATE src/subprocess-posix.cc)
target_sources(libninja PRIVATE
src/subprocess-posix.cc
src/tokenpool-gnu-make-posix.cc
)
if(CMAKE_SYSTEM_NAME STREQUAL "OS400" OR CMAKE_SYSTEM_NAME STREQUAL "AIX")
target_sources(libninja PRIVATE src/getopt.c)
# Build getopt.c, which can be compiled as either C or C++, as C++
Expand Down Expand Up @@ -224,6 +229,7 @@ if(BUILD_TESTING)
src/string_piece_util_test.cc
src/subprocess_test.cc
src/test.cc
src/tokenpool_test.cc
src/util_test.cc
)
if(WIN32)
Expand Down
7 changes: 6 additions & 1 deletion configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,11 +538,13 @@ def has_re2c():
'state',
'status',
'string_piece_util',
'tokenpool-gnu-make',
'util',
'version']:
objs += cxx(name, variables=cxxvariables)
if platform.is_windows():
for name in ['subprocess-win32',
'tokenpool-gnu-make-win32',
'includes_normalize-win32',
'msvc_helper-win32',
'msvc_helper_main-win32']:
Expand All @@ -551,7 +553,9 @@ def has_re2c():
objs += cxx('minidump-win32', variables=cxxvariables)
objs += cc('getopt')
else:
objs += cxx('subprocess-posix')
for name in ['subprocess-posix',
'tokenpool-gnu-make-posix']:
objs += cxx(name)
if platform.is_aix():
objs += cc('getopt')
if platform.is_msvc():
Expand Down Expand Up @@ -609,6 +613,7 @@ def has_re2c():
'string_piece_util_test',
'subprocess_test',
'test',
'tokenpool_test',
'util_test']:
objs += cxx(name, variables=cxxvariables)
if platform.is_windows():
Expand Down
131 changes: 98 additions & 33 deletions src/build.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "state.h"
#include "status.h"
#include "subprocess.h"
#include "tokenpool.h"
#include "util.h"

using namespace std;
Expand All @@ -47,8 +48,9 @@ struct DryRunCommandRunner : public CommandRunner {

// Overridden from CommandRunner:
virtual bool CanRunMore() const;
virtual bool AcquireToken();
virtual bool StartCommand(Edge* edge);
virtual bool WaitForCommand(Result* result);
virtual bool WaitForCommand(Result* result, bool more_ready);

private:
queue<Edge*> finished_;
Expand All @@ -58,12 +60,16 @@ bool DryRunCommandRunner::CanRunMore() const {
return true;
}

bool DryRunCommandRunner::AcquireToken() {
return true;
}

bool DryRunCommandRunner::StartCommand(Edge* edge) {
finished_.push(edge);
return true;
}

bool DryRunCommandRunner::WaitForCommand(Result* result) {
bool DryRunCommandRunner::WaitForCommand(Result* result, bool more_ready) {
if (finished_.empty())
return false;

Expand Down Expand Up @@ -149,7 +155,7 @@ void Plan::EdgeWanted(const Edge* edge) {
}

Edge* Plan::FindWork() {
if (ready_.empty())
if (!more_ready())
return NULL;
EdgeSet::iterator e = ready_.begin();
Edge* edge = *e;
Expand Down Expand Up @@ -448,19 +454,46 @@ void Plan::Dump() const {
}

struct RealCommandRunner : public CommandRunner {
explicit RealCommandRunner(const BuildConfig& config) : config_(config) {}
virtual ~RealCommandRunner() {}
explicit RealCommandRunner(const BuildConfig& config);
virtual ~RealCommandRunner();
virtual bool CanRunMore() const;
virtual bool AcquireToken();
virtual bool StartCommand(Edge* edge);
virtual bool WaitForCommand(Result* result);
virtual bool WaitForCommand(Result* result, bool more_ready);
virtual vector<Edge*> GetActiveEdges();
virtual void Abort();

const BuildConfig& config_;
// copy of config_.max_load_average; can be modified by TokenPool setup
double max_load_average_;
SubprocessSet subprocs_;
TokenPool* tokens_;
map<const Subprocess*, Edge*> subproc_to_edge_;
};

RealCommandRunner::RealCommandRunner(const BuildConfig& config) : config_(config) {
max_load_average_ = config.max_load_average;
if ((tokens_ = TokenPool::Get()) != NULL) {
bool setup_ok = config_.tokenpool_master ?
tokens_->SetupMaster(config_.verbosity == BuildConfig::VERBOSE,
config_.parallelism,
max_load_average_,
config_.tokenpool_master_style) :
tokens_->SetupClient(config_.parallelism_from_cmdline,
config_.verbosity == BuildConfig::VERBOSE,
max_load_average_);

if (!setup_ok) {
delete tokens_;
tokens_ = NULL;
}
}
}

RealCommandRunner::~RealCommandRunner() {
delete tokens_;
}

vector<Edge*> RealCommandRunner::GetActiveEdges() {
vector<Edge*> edges;
for (map<const Subprocess*, Edge*>::iterator e = subproc_to_edge_.begin();
Expand All @@ -471,34 +504,57 @@ vector<Edge*> RealCommandRunner::GetActiveEdges() {

void RealCommandRunner::Abort() {
subprocs_.Clear();
if (tokens_)
tokens_->Clear();
}

bool RealCommandRunner::CanRunMore() const {
size_t subproc_number =
subprocs_.running_.size() + subprocs_.finished_.size();
return (int)subproc_number < config_.parallelism
&& ((subprocs_.running_.empty() || config_.max_load_average <= 0.0f)
|| GetLoadAverage() < config_.max_load_average);
bool parallelism_limit_not_reached =
tokens_ || // ignore config_.parallelism
((int) (subprocs_.running_.size() +
subprocs_.finished_.size()) < config_.parallelism);
return parallelism_limit_not_reached
&& (subprocs_.running_.empty() ||
(max_load_average_ <= 0.0f ||
GetLoadAverage() < max_load_average_));
}

bool RealCommandRunner::AcquireToken() {
return (!tokens_ || tokens_->Acquire());
}

bool RealCommandRunner::StartCommand(Edge* edge) {
string command = edge->EvaluateCommand();
Subprocess* subproc = subprocs_.Add(command, edge->use_console());
if (!subproc)
return false;
if (tokens_)
tokens_->Reserve();
subproc_to_edge_.insert(make_pair(subproc, edge));

return true;
}

bool RealCommandRunner::WaitForCommand(Result* result) {
bool RealCommandRunner::WaitForCommand(Result* result, bool more_ready) {
Subprocess* subproc;
while ((subproc = subprocs_.NextFinished()) == NULL) {
bool interrupted = subprocs_.DoWork();
subprocs_.ResetTokenAvailable();
while (((subproc = subprocs_.NextFinished()) == NULL) &&
!subprocs_.IsTokenAvailable()) {
bool interrupted = subprocs_.DoWork(more_ready ? tokens_ : NULL);
if (interrupted)
return false;
}

// token became available
if (subproc == NULL) {
result->status = ExitTokenAvailable;
return true;
Copy link

@Ext3h Ext3h Mar 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can't be right... subprocs_.NextFinished() in that loop above has popped any number of completed processes.

But tokens_->Release() is only being called conditionally and only once at most.

So this is leaving tokens incorrectly marked as "in-use". You would have needed to release the token together with finished_.push(*i);.

Copy link

@Ext3h Ext3h Mar 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... no, that loop above is looping while it does not pop anything. And you are also only quitting if no process had finished, and you were certain to have gotten a token instead.

Okay, this is working then, just really hard to follow and the naming got seriously confusing.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might have wanted to put it in the else to the above if, and put a comment on the loop explaining the non-trivial exit condition.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also the methods used here are definitely overdue for renaming, their names don't reflect what they do.

}

// command completed
if (tokens_)
tokens_->Release();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has to be done after result->status = subproc->Finish() - the process is still running, only the pipe was broken yet.

Unrelated - but this is also a spot for optimization, as Ninja will get stalled here if a long-running process detaches from terminal early.


result->status = subproc->Finish();
result->output = subproc->GetOutput();

Expand Down Expand Up @@ -628,45 +684,54 @@ bool Builder::Build(string* err) {
// command runner.
// Second, we attempt to wait for / reap the next finished command.
while (plan_.more_to_do()) {
// See if we can start any more commands.
if (failures_allowed && command_runner_->CanRunMore()) {
if (Edge* edge = plan_.FindWork()) {
if (edge->GetBindingBool("generator")) {
// See if we can start any more commands...
bool can_run_more =
failures_allowed &&
plan_.more_ready() &&
command_runner_->CanRunMore();
Copy link

@Ext3h Ext3h Mar 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This CanRunMore() is kind of wrong. If there is a token pool, it's the sole authority on parallelism. No matter whether it's doing this based on load measurement or token counting.

If you have an AcquireToken() now, then you should only be calling CanRunMore() as an implementation detail within AcquireToken in the fallback case when a token pool is absent.


// ... but we also need a token to do that.
if (can_run_more && command_runner_->AcquireToken()) {
Edge* edge = plan_.FindWork();
if (edge->GetBindingBool("generator")) {
scan_.build_log()->Close();
}

if (!StartEdge(edge, err)) {
if (!StartEdge(edge, err)) {
Cleanup();
status_->BuildFinished();
return false;
}

if (edge->is_phony()) {
if (!plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, err)) {
Cleanup();
status_->BuildFinished();
return false;
}

if (edge->is_phony()) {
if (!plan_.EdgeFinished(edge, Plan::kEdgeSucceeded, err)) {
Cleanup();
status_->BuildFinished();
return false;
}
} else {
++pending_commands;
}

// We made some progress; go back to the main loop.
continue;
} else {
++pending_commands;
}

// We made some progress; go back to the main loop.
continue;
}

// See if we can reap any finished commands.
if (pending_commands) {
CommandRunner::Result result;
if (!command_runner_->WaitForCommand(&result) ||
if (!command_runner_->WaitForCommand(&result, can_run_more) ||
result.status == ExitInterrupted) {
Cleanup();
status_->BuildFinished();
*err = "interrupted by user";
return false;
}

// We might be able to start another command; start the main loop over.
if (result.status == ExitTokenAvailable)
continue;

--pending_commands;
if (!FinishCommand(&result, err)) {
Cleanup();
Expand Down
15 changes: 13 additions & 2 deletions src/build.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ struct Plan {
/// Returns true if there's more work to be done.
bool more_to_do() const { return wanted_edges_ > 0 && command_edges_ > 0; }

/// Returns true if there's more edges ready to start
bool more_ready() const { return !ready_.empty(); }

/// Dumps the current state of the plan.
void Dump() const;

Expand Down Expand Up @@ -136,6 +139,7 @@ struct Plan {
struct CommandRunner {
virtual ~CommandRunner() {}
virtual bool CanRunMore() const = 0;
virtual bool AcquireToken() = 0;
virtual bool StartCommand(Edge* edge) = 0;

/// The result of waiting for a command.
Expand All @@ -147,15 +151,19 @@ struct CommandRunner {
bool success() const { return status == ExitSuccess; }
};
/// Wait for a command to complete, or return false if interrupted.
virtual bool WaitForCommand(Result* result) = 0;
/// If more_ready is true then the optional TokenPool is monitored too
/// and we return when a token becomes available.
virtual bool WaitForCommand(Result* result, bool more_ready) = 0;

virtual std::vector<Edge*> GetActiveEdges() { return std::vector<Edge*>(); }
virtual void Abort() {}
};

/// Options (e.g. verbosity, parallelism) passed to a build.
struct BuildConfig {
BuildConfig() : verbosity(NORMAL), dry_run(false), parallelism(1),
BuildConfig() : verbosity(NORMAL), dry_run(false),
parallelism(1), parallelism_from_cmdline(false),
tokenpool_master(false), tokenpool_master_style(NULL),
failures_allowed(1), max_load_average(-0.0f) {}

enum Verbosity {
Expand All @@ -167,6 +175,9 @@ struct BuildConfig {
Verbosity verbosity;
bool dry_run;
int parallelism;
bool parallelism_from_cmdline;
bool tokenpool_master;
const char* tokenpool_master_style;
int failures_allowed;
/// The maximum load average we must not exceed. A negative value
/// means that we do not have any limit.
Expand Down
Loading