Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Snowflake specific changes #3

Open
wants to merge 8 commits into
base: release-7.1.2
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -117,28 +117,34 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
if (!file.getFileName().toString().endsWith(".class")) {
return FileVisitResult.CONTINUE;
}
// TODO(bazel-team): filter with coverage_instrumentation_filter?
// It's not clear whether there is any advantage in not instrumenting *Test classes,
// apart from lowering the covered percentage in the aggregate statistics.

// We first move the original .class file to our metadata directory, then instrument it
// and output the instrumented version in the regular classes output directory.
Path instrumentedCopy = file;
Path uninstrumentedCopy;
if (isNewCoverageImplementation) {
Path absoluteUninstrumentedCopy = Paths.get(file + ".uninstrumented");
uninstrumentedCopy =
instrumentedClassesDirectory.resolve(root.relativize(absoluteUninstrumentedCopy));
} else {
uninstrumentedCopy = instrumentedClassesDirectory.resolve(root.relativize(file));
}
Files.createDirectories(uninstrumentedCopy.getParent());
Files.move(file, uninstrumentedCopy);
try (InputStream input =
new BufferedInputStream(Files.newInputStream(uninstrumentedCopy));
OutputStream output =
new BufferedOutputStream(Files.newOutputStream(instrumentedCopy))) {
instr.instrument(input, output, file.toString());
try {
// We first move the original .class file to our metadata directory, then instrument
// it and output the instrumented version in the regular classes output directory.
Path instrumentedCopy = file;
Path uninstrumentedCopy;
if (isNewCoverageImplementation) {
Path absoluteUninstrumentedCopy = Paths.get(file + ".uninstrumented");
uninstrumentedCopy =
instrumentedClassesDirectory.resolve(
root.relativize(absoluteUninstrumentedCopy));
} else {
uninstrumentedCopy = instrumentedClassesDirectory.resolve(root.relativize(file));
}
Files.createDirectories(uninstrumentedCopy.getParent());
Files.move(file, uninstrumentedCopy);
try (InputStream input =
new BufferedInputStream(Files.newInputStream(uninstrumentedCopy));
OutputStream output =
new BufferedOutputStream(Files.newOutputStream(instrumentedCopy))) {
instr.instrument(input, output, file.toString());
} catch (Exception e) {
Files.delete(instrumentedCopy);
Files.copy(uninstrumentedCopy, instrumentedCopy);
throw e; // Bubble up to the outer broader safety catch block for logging.
}
} catch (Exception e) {
System.err.printf("WARNING: %s was not instrumented: %s%n\n", file, e.getMessage());
}
return FileVisitResult.CONTINUE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
import com.google.devtools.build.lib.cmdline.RepositoryName;
import com.google.devtools.build.lib.events.Event;
import com.google.devtools.build.lib.events.EventHandler;
import com.google.devtools.build.lib.events.ExtendedEventHandler;
import com.google.devtools.build.lib.events.ExtendedEventHandler.FetchProgress;
import com.google.devtools.build.lib.events.NullEventHandler;
import com.google.devtools.build.lib.packages.StarlarkInfo;
import com.google.devtools.build.lib.packages.StructImpl;
import com.google.devtools.build.lib.packages.StructProvider;
Expand Down Expand Up @@ -615,8 +617,8 @@ private StructImpl completeDownload(PendingDownload pendingDownload)
defaultValue = "False",
named = true,
doc =
"If set, indicate the error in the return value"
+ " instead of raising an error for failed downloads"),
"If set, indicate the error in the return value instead of raising an error for"
+ " failed downloads. This silences errors and warnings."),
@Param(
name = "canonical_id",
defaultValue = "''",
Expand Down Expand Up @@ -726,7 +728,7 @@ public Object download(
canonicalId,
Optional.<String>empty(),
outputPath.getPath(),
env.getListener(),
allowFail ? NullEventHandler.INSTANCE : env.getListener(),
envVariables,
getIdentifyingStringForLogging());
download =
Expand Down Expand Up @@ -815,8 +817,8 @@ public Object download(
defaultValue = "False",
named = true,
doc =
"If set, indicate the error in the return value"
+ " instead of raising an error for failed downloads"),
"If set, indicate the error in the return value instead of raising an error for"
+ " failed downloads. This silences errors and warnings."),
@Param(
name = "canonical_id",
defaultValue = "''",
Expand Down Expand Up @@ -927,7 +929,7 @@ public StructImpl downloadAndExtract(
canonicalId,
Optional.of(type),
downloadDirectory,
env.getListener(),
allowFail ? NullEventHandler.INSTANCE : env.getListener(),
envVariables,
getIdentifyingStringForLogging());
downloadedPath = downloadManager.finalizeDownload(pendingDownload);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,21 @@ public boolean usingLocalTestJobs() {
+ " code 39.")
public int remoteRetryOnCacheEviction;

@Option(
name = "experimental_use_remote_cache_for_cache_unaware_spawns",
defaultValue = "true",
documentationCategory = OptionDocumentationCategory.REMOTE,
effectTags = {OptionEffectTag.UNKNOWN},
help =
"If set to true (the default), allow spawns other than \"remote\" to use remote caching. "
+ "Setting this to false is useful when troubleshooting non-determinism cases in a "
+ "\"mixed\" configuration where certain spawns are forced to run remotely (for "
+ "example, to hide their non-determinism) and others are allowed to run via dynamic "
+ "execution. In such cases, we must allow the remote spawns to leverage the remote "
+ "cache, but not those that are configured to run locally. This flag is similar in "
+ "purpose to --remote_accept_cached.")
public boolean useRemoteCacheForCacheUnawareSpawns;

/** An enum for specifying different formats of test output. */
public enum TestOutputFormat {
SUMMARY, // Provide summary output only.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:resource_manager",
"//src/main/java/com/google/devtools/build/lib/concurrent",
"//src/main/java/com/google/devtools/build/lib/exec:bin_tools",
"//src/main/java/com/google/devtools/build/lib/exec:execution_options",
"//src/main/java/com/google/devtools/build/lib/exec:runfiles_tree_updater",
"//src/main/java/com/google/devtools/build/lib/exec:spawn_runner",
"//src/main/java/com/google/devtools/build/lib/profiler",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import com.google.devtools.build.lib.actions.cache.VirtualActionInput;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
import com.google.devtools.build.lib.exec.BinTools;
import com.google.devtools.build.lib.exec.ExecutionOptions;
import com.google.devtools.build.lib.exec.RunfilesTreeUpdater;
import com.google.devtools.build.lib.exec.SpawnExecutingEvent;
import com.google.devtools.build.lib.exec.SpawnRunner;
Expand Down Expand Up @@ -100,9 +101,11 @@ public class LocalSpawnRunner implements SpawnRunner {
private final BinTools binTools;

private final RunfilesTreeUpdater runfilesTreeUpdater;
private final boolean handlesCaching;

public LocalSpawnRunner(
Path execRoot,
ExecutionOptions executionOptions,
LocalExecutionOptions localExecutionOptions,
ResourceManager resourceManager,
LocalEnvProvider localEnvProvider,
Expand All @@ -117,6 +120,7 @@ public LocalSpawnRunner(
this.localEnvProvider = localEnvProvider;
this.binTools = binTools;
this.runfilesTreeUpdater = runfilesTreeUpdater;
this.handlesCaching = !executionOptions.useRemoteCacheForCacheUnawareSpawns;
}

@Override
Expand Down Expand Up @@ -182,7 +186,7 @@ public boolean canExec(Spawn spawn) {

@Override
public boolean handlesCaching() {
return false;
return handlesCaching;
}

protected Path createActionTemp(Path execRoot) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,11 @@
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.longrunning.Operation;
import com.google.protobuf.Any;
import com.google.protobuf.Timestamp;
import com.google.protobuf.util.Durations;
import com.google.protobuf.util.Timestamps;
import com.google.rpc.RetryInfo;
import io.grpc.Status.Code;
import java.io.IOException;
import java.time.Duration;
Expand Down Expand Up @@ -179,6 +181,29 @@ public void reportExecutingIfNot() {
}
}

/**
* Guess if an action failed due to a transient remote error or not.
*
* <p>As described in <a href="https://github.com/bazelbuild/bazel/issues/18319">#18319</a>, the
* Remote Build protocol does not yet provide a way to distinguish between regular action crashes
* and unexpected worker crashes. This provides a heuristic to retry on likely worker crashes.
* The specific error code we may get depends on the remote executor implementation so we need to
* be quite liberal here: Go funnels all non-regular exit codes into {@code -1} and the shell
* returns {@code 128+SIGNO}.
*/
private boolean isTransientError(RemoteAction action, RemoteActionResult result) {
if (!remoteOptions.remoteExitSignalsAreTransientErrors) {
return false;
}

boolean isTestAction = action.getSpawn().getMnemonic().equals("TestRunner");
if (isTestAction) {
return false;
}

return result.getExitCode() < 0 || result.getExitCode() > 127;
}

@Override
public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
throws ExecException, InterruptedException, IOException, ForbiddenActionInputException {
Expand Down Expand Up @@ -306,6 +331,21 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context)
maybeDownloadServerLogs(action, result.getResponse());
}

if (isTransientError(action, result)) {
// This contraption is required to convince the ExecuteRetrier that the failure can
// be retried.
com.google.rpc.Status synthesizedStatus = com.google.rpc.Status.newBuilder()
.setCode(com.google.rpc.Code.FAILED_PRECONDITION.getNumber())
.setMessage(
"Remote action seems to have terminated due to a signal (exit code "
+ result.getExitCode()
+ "); assuming worker crash to retry")
.addDetails(Any.pack(RetryInfo.newBuilder().build()))
.build();
Exception cause = new ExecutionStatusException(synthesizedStatus, null);
throw new IOException(cause);
}

try {
return downloadAndFinalizeSpawnResult(
action,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,22 @@ public RemoteBuildEventUploadModeConverter() {
+ "If set to 0, retries are disabled.")
public int remoteMaxRetryAttempts;

// TODO(https://github.com/bazelbuild/bazel/issues/18319): Remove once the Remote Build
// protocol can distinguish between abrupt worker termination and action termination.
@Option(
name = "snowflake_remote_exit_signals_are_transient_errors",
defaultValue = "false",
documentationCategory = OptionDocumentationCategory.REMOTE,
effectTags = {OptionEffectTag.UNKNOWN},
help =
"Consider abrupt terminations of remote non-test actions as transient failures. "
+ "This can be used to retry actions that fail due to their remote worker "
+ "crashing instead of failing the build. Note that this cannot differentiate "
+ "between those failures and legitimate action crashes so, if you enable this, "
+ "you should also set --remote_retries to non-zero or enable local fallback "
+ "to avoid getting into an infinite retry loop.")
public boolean remoteExitSignalsAreTransientErrors;

@Option(
name = "remote_retry_max_delay",
defaultValue = "5s",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ private LazyData doCostlyEstimation() {
case LINUX:
resourceSet =
ResourceSet.createWithRamCpu(
/* memoryMb= */ Math.max(50, -100 + 0.1 * inputsCount), /* cpu= */ 1);
/* memoryMb= */ 100 + 2.4 * (inputsBytes / 1024.0 / 1024.0), /* cpuUsage= */ 1);
break;
default:
resourceSet =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ DetailedExitCode parseOptions(List<String> args, ExtendedEventHandler eventHandl
// to improve the user experience, but not required for safety or correctness.
optionsPolicyEnforcer.enforce(optionsParser, commandAnnotation.name());
// Print warnings for odd options usage
for (String warning : optionsParser.getWarnings()) {
for (String warning : getFilteredWarnings(optionsParser)) {
eventHandler.handle(Event.warn(warning));
}
CommonCommandOptions commonOptions = optionsParser.getOptions(CommonCommandOptions.class);
Expand Down Expand Up @@ -704,4 +704,20 @@ private static void partitionCommandLineArgs(
}
}
}

public static ImmutableList<String> getFilteredWarnings(OptionsParser parser) {
CommonCommandOptions commonOptions = parser.getOptions(CommonCommandOptions.class);
Preconditions.checkNotNull(commonOptions,
"getWarnings can only be called after options parsing");

ImmutableList.Builder<String> warnings = ImmutableList.<String>builder();
for (String warning : parser.getWarnings()) {
if (!commonOptions.reportDuplicateOptionsAndConfigs && warning.contains(
"was expanded to from both")) {
continue;
}
warnings.add(warning);
}
return warnings.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import com.google.devtools.build.lib.runtime.proto.CommandLineOuterClass.CommandLineSection;
import com.google.devtools.build.lib.runtime.proto.CommandLineOuterClass.Option;
import com.google.devtools.build.lib.runtime.proto.CommandLineOuterClass.OptionList;
import com.google.devtools.build.lib.util.OptionsUtils;
import com.google.devtools.build.lib.util.OptionsUtils.OptionSensitivity;
import com.google.devtools.build.lib.util.Pair;
import com.google.devtools.common.options.OptionDefinition;
import com.google.devtools.common.options.OptionEffectTag;
Expand Down Expand Up @@ -138,11 +140,13 @@ List<Option> getOptionListFromParsedOptionDescriptions(

private Option createOption(
OptionDefinition optionDefinition, String combinedForm, @Nullable String value) {
OptionSensitivity sensitivity = OptionsUtils.getOptionSensitivity(optionDefinition.getOptionName());

Option.Builder option = Option.newBuilder();
option.setCombinedForm(combinedForm);
option.setCombinedForm(OptionsUtils.maybeScrubCombinedForm(sensitivity, combinedForm));
option.setOptionName(optionDefinition.getOptionName());
if (value != null) {
option.setOptionValue(value);
option.setOptionValue(OptionsUtils.maybeScrubAssignment(sensitivity, value));
}
option.addAllEffectTags(getProtoEffectTags(optionDefinition.getOptionEffectTags()));
option.addAllMetadataTags(getProtoMetadataTags(optionDefinition.getOptionMetadataTags()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,14 @@ public String getTypeDescription() {
+ " them.")
public boolean heuristicallyDropNodes;

@Option(
name = "snowflake_report_duplicate_options_and_configs",
defaultValue = "true",
documentationCategory = OptionDocumentationCategory.LOGGING,
effectTags = {OptionEffectTag.TERMINAL_OUTPUT},
help = "If enabled, reports flags and configs that were expanded multiple times as warnings.")
public boolean reportDuplicateOptionsAndConfigs;

/** The option converter to check that the user can only specify legal profiler tasks. */
public static class ProfilerTaskConverter extends EnumConverter<ProfilerTask> {
public ProfilerTaskConverter() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,11 @@ static void expandConfigOptions(

// At this point, we've expanded everything, identify duplicates, if any, to warn about
// re-application.
List<String> configs = optionsParser.getOptions(CommonCommandOptions.class).configs;
CommonCommandOptions commonOptions = optionsParser.getOptions(CommonCommandOptions.class);
if (!commonOptions.reportDuplicateOptionsAndConfigs) {
return;
}
List<String> configs = commonOptions.configs;
Set<String> configSet = new HashSet<>();
LinkedHashSet<String> duplicateConfigs = new LinkedHashSet<>();
for (String configValue : configs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import com.google.devtools.build.lib.buildeventstream.BuildEventStreamProtos.BuildEventId;
import com.google.devtools.build.lib.buildeventstream.BuildEventWithOrderConstraint;
import com.google.devtools.build.lib.buildeventstream.GenericBuildEvent;
import com.google.devtools.build.lib.util.OptionsUtils;
import java.util.Collection;
import java.util.List;

Expand All @@ -31,7 +32,7 @@ public class OriginalUnstructuredCommandLineEvent implements BuildEventWithOrder
private final ImmutableList<String> args;

OriginalUnstructuredCommandLineEvent(List<String> args) {
this.args = ImmutableList.copyOf(args);
this.args = OptionsUtils.scrubArgs(ImmutableList.copyOf(args));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ abstract class AbstractSandboxSpawnRunner implements SpawnRunner {
private final Path execRoot;
private final ResourceManager resourceManager;
private final Reporter reporter;
private final boolean handlesCaching;

public AbstractSandboxSpawnRunner(CommandEnvironment cmdEnv) {
this.sandboxOptions = cmdEnv.getOptions().getOptions(SandboxOptions.class);
Expand All @@ -88,6 +89,7 @@ public AbstractSandboxSpawnRunner(CommandEnvironment cmdEnv) {
this.execRoot = cmdEnv.getExecRoot();
this.resourceManager = cmdEnv.getLocalResourceManager();
this.reporter = cmdEnv.getReporter();
this.handlesCaching = !cmdEnv.getOptions().getOptions(ExecutionOptions.class).useRemoteCacheForCacheUnawareSpawns;
}

@Override
Expand Down Expand Up @@ -132,7 +134,7 @@ public boolean canExec(Spawn spawn) {

@Override
public boolean handlesCaching() {
return false;
return this.handlesCaching;
}

protected abstract SandboxedSpawn prepareSpawn(Spawn spawn, SpawnExecutionContext context)
Expand Down
Loading