Skip to content

Commit

Permalink
Merged PR 726534: Limit the amount of open connections from the blob …
Browse files Browse the repository at this point in the history
…storage cache by sharing an HttpClient between BlobClients

If left unbounded, we have observed spikes of >65k open sockets (at which point we hit the OS limit of open files for the process - on Linux, where sockets count as files). Running builds where we limit this value all the way down to 100 didn't see any noticeable performance impact, so 30k shouldn't pose a problem. The configurable limit is per-client and per-server, but because we will reuse this HttpClient for all BlobClients and the 'server' (blob storage endpoint) is also always the same, we are effectively limiting the number of open connections in general.

Related work items: #2076905
  • Loading branch information
marcelolynch committed Jul 5, 2023
1 parent f927ccc commit d7aeee4
Showing 1 changed file with 32 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
using System.Collections.Concurrent;
using System.Diagnostics.ContractsLight;
using System.Linq;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using Azure;
using Azure.Core.Pipeline;
using Azure.Storage.Blobs;
using BuildXL.Cache.ContentStore.Interfaces.Results;
using BuildXL.Cache.ContentStore.Synchronization;
Expand All @@ -20,7 +22,7 @@
namespace BuildXL.Cache.ContentStore.Distributed.Blob;

public class ShardedBlobCacheTopology : IBlobCacheTopology
{
{
protected Tracer Tracer { get; } = new Tracer(nameof(ShardedBlobCacheTopology));

public record Configuration(
Expand All @@ -39,6 +41,15 @@ public record Configuration(
private readonly BlobCacheContainerName[] _containers;
private readonly IShardingScheme<int, BlobCacheStorageAccountName> _scheme;

/// <remarks>
/// We will reuse an HttpClient for the transport backing the blob clients. HttpClient is meant to be reused anyway
/// (https://learn.microsoft.com/en-us/dotnet/api/system.net.http.httpclient?view=net-7.0#instancing)
/// but crucially we have the need to configure the amount of open connections: when using the defaults,
/// the number of connections is unbounded, and we have observed builds where there end up being tens of thousands
/// of open sockets, which can (and did) hit the per-process limit of open files, crashing the engine.
/// </summary>
private readonly HttpClient _httpClient;

private readonly record struct Location(BlobCacheStorageAccountName Account, BlobCacheContainerName Container);

/// <summary>
Expand All @@ -62,6 +73,19 @@ public ShardedBlobCacheTopology(Configuration configuration)

_scheme = _configuration.ShardingScheme.Create();
_containers = GenerateContainerNames(_configuration.Universe, _configuration.Namespace, _configuration.ShardingScheme);

_httpClient = new HttpClient(
new HttpClientHandler()
{
// If left unbounded, we have observed spikes of >65k open sockets (at which point we hit
// the OS limit of open files for the process - on Linux, where sockets count as files).
// Running builds where we limit this value all the way down to 100 didn't see
// any noticeable performance impact, so 30k shouldn't pose a problem.
// The configurable limit is per-client and per-server, but because we will reuse this HttpClient
// for all BlobClients and the 'server' (blob storage endpoint) is also always the same,
// we are effectively limiting the number of open connections in general.
MaxConnectionsPerServer = 30_000
});
}

internal static BlobCacheContainerName[] GenerateContainerNames(string universe, string @namespace, ShardingScheme scheme)
Expand Down Expand Up @@ -155,7 +179,13 @@ private Task<Result<BlobContainerClient>> CreateClientAsync(OperationContext con
async context =>
{
var credentials = await _configuration.SecretsProvider.RetrieveBlobCredentialsAsync(context, account, container);
var containerClient = credentials.CreateContainerClient(container.ContainerName);
BlobClientOptions blobClientOptions = new(BlobClientOptions.ServiceVersion.V2021_02_12)
{
Transport = new HttpClientTransport(_httpClient)
};
var containerClient = credentials.CreateContainerClient(container.ContainerName, blobClientOptions);
try
{
Expand Down

0 comments on commit d7aeee4

Please sign in to comment.