Skip to content

Commit

Permalink
Add ContainerRuntimeInitializationTimeout to abort startup when conta…
Browse files Browse the repository at this point in the history
…iner runtime stays unhealthy for too long (#7241)

* Add ContainerRuntimeInitializationTimeout to abort startup when container runtime stays unhealthy for too long

* Change DcpDependencyCheck exception type

* Update src/Aspire.Hosting.Testing/DistributedApplicationFactory.cs

---------

Co-authored-by: David Fowler <[email protected]>
  • Loading branch information
ReubenBond and davidfowl authored Jan 25, 2025
1 parent d2ca15d commit e7963de
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ private void OnBuilderCreatingCore(DistributedApplicationOptions applicationOpti
var cfg = hostBuilderOptions.Configuration ??= new();
var additionalConfig = new Dictionary<string, string?>
{
["DcpPublisher:ContainerRuntimeInitializationTimeout"] = "00:00:30",
["DcpPublisher:RandomizePorts"] = "true",
["DcpPublisher:DeleteResourcesOnShutdown"] = "true",
["DcpPublisher:ResourceNameSuffix"] = $"{Random.Shared.Next():x}",
Expand Down
22 changes: 15 additions & 7 deletions src/Aspire.Hosting/Dcp/DcpDependencyCheck.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ public DcpDependencyCheck(IOptions<DcpOptions> dcpOptions)
_dcpOptions = dcpOptions.Value;
}

public async Task<DcpInfo?> GetDcpInfoAsync(CancellationToken cancellationToken = default)
public async Task<DcpInfo?> GetDcpInfoAsync(bool force = false, CancellationToken cancellationToken = default)
{
await _lock.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
if (_checkDone)
if (_checkDone && !force)
{
return _dcpInfo;
}
Expand Down Expand Up @@ -172,7 +172,7 @@ private static void EnsureDcpVersion(DcpInfo dcpInfo)
}
}

internal static void CheckDcpInfoAndLogErrors(ILogger logger, DcpOptions options, DcpInfo dcpInfo)
internal static void CheckDcpInfoAndLogErrors(ILogger logger, DcpOptions options, DcpInfo dcpInfo, bool throwIfUnhealthy = false)
{
var containerRuntime = options.ContainerRuntime;
if (string.IsNullOrEmpty(containerRuntime))
Expand All @@ -186,14 +186,18 @@ internal static void CheckDcpInfoAndLogErrors(ILogger logger, DcpOptions options

if (!installed)
{
logger.LogWarning("Container runtime '{runtime}' could not be found. See https://aka.ms/dotnet/aspire/containers for more details on supported container runtimes.", containerRuntime);
logger.LogWarning("Container runtime '{Runtime}' could not be found. See https://aka.ms/dotnet/aspire/containers for more details on supported container runtimes.", containerRuntime);

logger.LogDebug("The error from the container runtime check was: {error}", error);
logger.LogDebug("The error from the container runtime check was: {Error}", error);
if (throwIfUnhealthy)
{
throw new DistributedApplicationException($"Container runtime '{containerRuntime}' could not be found. See https://aka.ms/dotnet/aspire/containers for more details on supported container runtimes.");
}
}
else if (!running)
{
var messageFormat = new StringBuilder();
messageFormat.Append("Container runtime '{runtime}' was found but appears to be unhealthy. ");
messageFormat.Append("Container runtime '{Runtime}' was found but appears to be unhealthy. ");

if (string.Equals(containerRuntime, "docker", StringComparison.OrdinalIgnoreCase))
{
Expand All @@ -211,7 +215,11 @@ internal static void CheckDcpInfoAndLogErrors(ILogger logger, DcpOptions options

logger.LogWarning(messageFormat.ToString(), containerRuntime);

logger.LogDebug("The error from the container runtime check was: {error}", error);
logger.LogDebug("The error from the container runtime check was: {Error}", error);
if (throwIfUnhealthy)
{
throw new DistributedApplicationException(messageFormat.Replace("{Runtime}", containerRuntime).ToString());
}
}
}
}
2 changes: 1 addition & 1 deletion src/Aspire.Hosting/Dcp/DcpExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ public async Task RunApplicationAsync(CancellationToken cancellationToken = defa
{
AspireEventSource.Instance.DcpModelCreationStart();

_dcpInfo = await _dcpDependencyCheckService.GetDcpInfoAsync(cancellationToken).ConfigureAwait(false);
_dcpInfo = await _dcpDependencyCheckService.GetDcpInfoAsync(cancellationToken: cancellationToken).ConfigureAwait(false);

Debug.Assert(_dcpInfo is not null, "DCP info should not be null at this point");

Expand Down
43 changes: 37 additions & 6 deletions src/Aspire.Hosting/Dcp/DcpHost.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,15 @@ public DcpHost(

public async Task StartAsync(CancellationToken cancellationToken)
{
// Ensure DCP is installed and has all required dependencies
var dcpInfo = await _dependencyCheckService.GetDcpInfoAsync(cancellationToken).ConfigureAwait(false);

EnsureDcpContainerRuntime(dcpInfo);
await EnsureDcpContainerRuntimeAsync(cancellationToken).ConfigureAwait(false);
EnsureDcpHostRunning();
}

private void EnsureDcpContainerRuntime(DcpInfo? dcpInfo)
private async Task EnsureDcpContainerRuntimeAsync(CancellationToken cancellationToken)
{
// Ensure DCP is installed and has all required dependencies
var dcpInfo = await _dependencyCheckService.GetDcpInfoAsync(cancellationToken: cancellationToken).ConfigureAwait(false);

if (dcpInfo is null)
{
return;
Expand All @@ -80,7 +80,38 @@ private void EnsureDcpContainerRuntime(DcpInfo? dcpInfo)

try
{
DcpDependencyCheck.CheckDcpInfoAndLogErrors(_logger, _dcpOptions, dcpInfo);
bool requireContainerRuntimeInitialization = _dcpOptions.ContainerRuntimeInitializationTimeout > TimeSpan.Zero;
if (requireContainerRuntimeInitialization)
{
using var timeoutCancellation = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
timeoutCancellation.CancelAfter(_dcpOptions.ContainerRuntimeInitializationTimeout);

static bool IsContainerRuntimeHealthy(DcpInfo dcpInfo)
{
var installed = dcpInfo.Containers?.Installed ?? false;
var running = dcpInfo.Containers?.Running ?? false;
return installed && running;
}

try
{
while (dcpInfo is not null && !IsContainerRuntimeHealthy(dcpInfo))
{
await Task.Delay(TimeSpan.FromSeconds(2), timeoutCancellation.Token).ConfigureAwait(false);
dcpInfo = await _dependencyCheckService.GetDcpInfoAsync(force: true, cancellationToken: timeoutCancellation.Token).ConfigureAwait(false);
}
}
catch (OperationCanceledException) when (timeoutCancellation.IsCancellationRequested)
{
// Swallow the cancellation exception and let it bubble up as a more helpful error
// about the container runtime in CheckDcpInfoAndLogErrors.
}
}

if (dcpInfo is not null)
{
DcpDependencyCheck.CheckDcpInfoAndLogErrors(_logger, _dcpOptions, dcpInfo, throwIfUnhealthy: requireContainerRuntimeInitialization);
}
}
finally
{
Expand Down
12 changes: 12 additions & 0 deletions src/Aspire.Hosting/Dcp/DcpOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,17 @@ internal sealed class DcpOptions

public int KubernetesConfigReadRetryIntervalMilliseconds { get; set; } = 100;

/// <summary>
/// The duration to wait for the container runtime to become healthy before aborting startup.
/// </summary>
/// <remarks>
/// A value of zero, which is the default value, indicates that the application will not wait for the container
/// runtime to become healthy.
/// If this property has a value greater than zero, the application will abort startup if the container runtime
/// does not become healthy within the specified timeout.
/// </remarks>
public TimeSpan ContainerRuntimeInitializationTimeout { get; set; }

public TimeSpan ServiceStartupWatchTimeout { get; set; } = TimeSpan.FromSeconds(10);
}

Expand Down Expand Up @@ -170,6 +181,7 @@ public void Configure(DcpOptions options)

options.RandomizePorts = dcpPublisherConfiguration.GetValue(nameof(options.RandomizePorts), options.RandomizePorts);
options.ServiceStartupWatchTimeout = configuration.GetValue("DOTNET_ASPIRE_SERVICE_STARTUP_WATCH_TIMEOUT", options.ServiceStartupWatchTimeout);
options.ContainerRuntimeInitializationTimeout = dcpPublisherConfiguration.GetValue(nameof(options.ContainerRuntimeInitializationTimeout), options.ContainerRuntimeInitializationTimeout);
}

private static string? GetMetadataValue(IEnumerable<AssemblyMetadataAttribute>? assemblyMetadata, string key)
Expand Down
2 changes: 1 addition & 1 deletion src/Aspire.Hosting/Dcp/IDcpDependencyCheckService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace Aspire.Hosting.Dcp;

internal interface IDcpDependencyCheckService
{
Task<DcpInfo?> GetDcpInfoAsync(CancellationToken cancellationToken = default);
Task<DcpInfo?> GetDcpInfoAsync(bool force = false, CancellationToken cancellationToken = default);
}

internal sealed class DcpInfo
Expand Down
2 changes: 1 addition & 1 deletion src/Aspire.Hosting/OtlpConfigurationExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public static void AddOtlpEnvironment(IResource resource, IConfiguration configu

// Set the service name and instance id to the resource name and UID. Values are injected by DCP.
var dcpDependencyCheckService = context.ExecutionContext.ServiceProvider.GetRequiredService<IDcpDependencyCheckService>();
var dcpInfo = await dcpDependencyCheckService.GetDcpInfoAsync(context.CancellationToken).ConfigureAwait(false);
var dcpInfo = await dcpDependencyCheckService.GetDcpInfoAsync(cancellationToken: context.CancellationToken).ConfigureAwait(false);
context.EnvironmentVariables["OTEL_RESOURCE_ATTRIBUTES"] = "service.instance.id={{- index .Annotations \"" + CustomResource.OtelServiceInstanceIdAnnotation + "\" -}}";
context.EnvironmentVariables["OTEL_SERVICE_NAME"] = "{{- index .Annotations \"" + CustomResource.OtelServiceNameAnnotation + "\" -}}";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
namespace Aspire.Hosting.Tests.Dcp;
internal sealed class TestDcpDependencyCheckService : IDcpDependencyCheckService
{
public Task<DcpInfo?> GetDcpInfoAsync(CancellationToken cancellationToken = default)
public Task<DcpInfo?> GetDcpInfoAsync(bool force = false, CancellationToken cancellationToken = default)
{
var dcpInfo = new DcpInfo
{
Expand Down

0 comments on commit e7963de

Please sign in to comment.