Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aws: Integrate aws cluster manager capability #38271

Merged
merged 32 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,5 +90,3 @@ The following statistics are output under the ``aws.metadata_credentials_provide
<provider_cluster>.credential_refreshes_failed, Counter, Total credential refreshes failed by this cluster. For example', this would be incremented if a WebIdentity token was expired
<provider_cluster>.credential_refreshes_succeeded, Counter, Total successful credential refreshes for this cluster. Successful refresh would indicate credentials are available for signing
<provider_cluster>.metadata_refresh_state, Gauge, 0 means the cluster is in initial refresh state', ie no successful credential refreshes have been performed. In 0 state the cluster will attempt credential refresh up to a maximum of once every 30 seconds. 1 means the cluster is in normal credential expiration based refresh state
<provider_cluster>.clusters_removed_by_cds, Counter, Number of metadata clusters removed during CDS refresh
<provider_cluster>.clusters_readded_after_cds, Counter, Number of metadata clusters replaced when CDS deletion occurs
1 change: 1 addition & 0 deletions source/extensions/common/aws/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ envoy_cc_library(
srcs = ["credentials_provider_impl.cc"],
hdrs = ["credentials_provider_impl.h"],
deps = [
":aws_cluster_manager_lib",
":credentials_provider_interface",
":metadata_fetcher_lib",
":utility_lib",
Expand Down
12 changes: 8 additions & 4 deletions source/extensions/common/aws/aws_cluster_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,15 @@ AwsClusterManager::AwsClusterManager(Server::Configuration::ServerFactoryContext
});
context_.initManager().add(*init_target_);
}
// We're pinned, so ensure that we remove our cluster update callbacks before cluster manager
// terminates

// We're pinned, so ensure that we remove our RAII callback handles before cluster manager and
// server manager terminates

shutdown_handle_ = context.lifecycleNotifier().registerCallback(
Server::ServerLifecycleNotifier::Stage::ShutdownExit,
[&](Event::PostCb) { cm_handle_.reset(); });
Server::ServerLifecycleNotifier::Stage::ShutdownExit, [this]() {
cm_handle_.reset();
shutdown_handle_.reset();
});
};

absl::StatusOr<AwsManagedClusterUpdateCallbacksHandlePtr>
Expand Down
409 changes: 207 additions & 202 deletions source/extensions/common/aws/credentials_provider_impl.cc

Large diffs are not rendered by default.

151 changes: 57 additions & 94 deletions source/extensions/common/aws/credentials_provider_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "source/common/init/target_impl.h"
#include "source/common/protobuf/message_validator_impl.h"
#include "source/common/protobuf/utility.h"
#include "source/extensions/common/aws/aws_cluster_manager.h"
#include "source/extensions/common/aws/credentials_provider.h"
#include "source/extensions/common/aws/metadata_fetcher.h"

Expand Down Expand Up @@ -110,84 +111,64 @@ class CredentialsFileCredentialsProvider : public CachedCredentialsProviderBase
void extractCredentials(absl::string_view credentials_string, absl::string_view profile);
};

class LoadClusterEntryHandle {
public:
virtual ~LoadClusterEntryHandle() = default;
};

#define ALL_METADATACREDENTIALSPROVIDER_STATS(COUNTER, GAUGE) \
COUNTER(credential_refreshes_performed) \
COUNTER(credential_refreshes_failed) \
COUNTER(credential_refreshes_succeeded) \
COUNTER(clusters_removed_by_cds) \
COUNTER(clusters_readded_after_cds) \
GAUGE(metadata_refresh_state, Accumulate)

struct MetadataCredentialsProviderStats {
ALL_METADATACREDENTIALSPROVIDER_STATS(GENERATE_COUNTER_STRUCT, GENERATE_GAUGE_STRUCT)
};

using LoadClusterEntryHandlePtr = std::unique_ptr<LoadClusterEntryHandle>;

class MetadataCredentialsProviderBase : public CachedCredentialsProviderBase {
class MetadataCredentialsProviderBase : public CachedCredentialsProviderBase,
public AwsManagedClusterUpdateCallbacks {
public:
friend class MetadataCredentialsProviderBaseFriend;
using CurlMetadataFetcher = std::function<absl::optional<std::string>(Http::RequestMessage&)>;
using OnAsyncFetchCb = std::function<void(const std::string&&)>;

MetadataCredentialsProviderBase(
Api::Api& api, ServerFactoryContextOptRef context,
const CurlMetadataFetcher& fetch_metadata_using_curl,
CreateMetadataFetcherCb create_metadata_fetcher_cb, absl::string_view cluster_name,
const envoy::config::cluster::v3::Cluster::DiscoveryType cluster_type, absl::string_view uri,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
std::chrono::seconds initialization_timer);
MetadataCredentialsProviderBase(Api::Api& api, ServerFactoryContextOptRef context,
AwsClusterManagerOptRef aws_cluster_manager,
absl::string_view cluster_name,
const CurlMetadataFetcher& fetch_metadata_using_curl,
CreateMetadataFetcherCb create_metadata_fetcher_cb,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
std::chrono::seconds initialization_timer);

~MetadataCredentialsProviderBase() override {
// Cancel our callback handle, to handle the case that we are exiting behind AWS cluster
// manager
if (callback_handle_) {
callback_handle_->cancel();
}
nbaws marked this conversation as resolved.
Show resolved Hide resolved
};

Credentials getCredentials() override;

// Get the Metadata credentials cache duration.
static std::chrono::seconds getCacheDuration();

private:
void createCluster(bool new_timer);
// Store the RAII cluster callback handle following registration call with AWS cluster manager
void setClusterReadyCallbackHandle(AwsManagedClusterUpdateCallbacksHandlePtr handle) {
callback_handle_ = std::move(handle);
}

protected:
struct LoadClusterEntryHandleImpl
: public LoadClusterEntryHandle,
RaiiMapOfListElement<std::string, LoadClusterEntryHandleImpl*> {
LoadClusterEntryHandleImpl(
absl::flat_hash_map<std::string, std::list<LoadClusterEntryHandleImpl*>>& parent,
absl::string_view host, Envoy::Event::TimerPtr& timer)
: RaiiMapOfListElement<std::string, LoadClusterEntryHandleImpl*>(parent, host, this),
timer_(timer) {}

Envoy::Event::TimerPtr& timer_;
};

struct ThreadLocalCredentialsCache : public ThreadLocal::ThreadLocalObject,
public Upstream::ClusterUpdateCallbacks {
ThreadLocalCredentialsCache(MetadataCredentialsProviderBase& parent)
: handle_(parent.context_->clusterManager().addThreadLocalClusterUpdateCallbacks(*this)),
parent_(parent), credentials_(std::make_shared<Credentials>()){};
struct ThreadLocalCredentialsCache : public ThreadLocal::ThreadLocalObject {
ThreadLocalCredentialsCache() : credentials_(std::make_shared<Credentials>()){};

~ThreadLocalCredentialsCache() override;

Upstream::ClusterUpdateCallbacksHandlePtr handle_;
// Parent credentials provider object
MetadataCredentialsProviderBase& parent_;
// The credentials object.
CredentialsConstSharedPtr credentials_;
absl::flat_hash_map<std::string, std::list<LoadClusterEntryHandleImpl*>> pending_clusters_;
// Lock guard.
Thread::MutexBasicLockable lock_;

private:
void onClusterAddOrUpdate(absl::string_view cluster_name,
Upstream::ThreadLocalClusterCommand&) override;
void onClusterRemoval(const std::string&) override;
};

const std::string& clusterName() const { return cluster_name_; }

// Callback from AWS cluster manager, triggered when our cluster comes online
void onClusterAddOrUpdate() override;

// Handle fetch done.
void handleFetchDone();

Expand All @@ -203,10 +184,6 @@ class MetadataCredentialsProviderBase : public CachedCredentialsProviderBase {
CreateMetadataFetcherCb create_metadata_fetcher_cb_;
// The cluster name to use for internal static cluster pointing towards the credentials provider.
std::string cluster_name_;
// The cluster type to use for internal static cluster pointing towards the credentials provider.
const envoy::config::cluster::v3::Cluster::DiscoveryType cluster_type_;
// The uri of internal static cluster credentials provider.
const std::string uri_;
// The cache duration of the fetched credentials.
std::chrono::seconds cache_duration_;
// Metadata receiver state, describing where we are along the initial credential refresh process
Expand All @@ -228,22 +205,18 @@ class MetadataCredentialsProviderBase : public CachedCredentialsProviderBase {
SystemTime last_updated_;
// Cache credentials when using libcurl.
Credentials cached_credentials_;
// The init target.
std::unique_ptr<Init::TargetImpl> init_target_;
// Used in logs.
const std::string debug_name_;
// The expiration time received in any returned token
absl::optional<SystemTime> expiration_time_;
// Tls slot
ThreadLocal::TypedSlotPtr<ThreadLocalCredentialsCache> tls_slot_ = nullptr;
// Storage for our per cluster credential timers
LoadClusterEntryHandlePtr cluster_load_handle_;
// Stats scope
Stats::ScopeSharedPtr scope_ = nullptr;
// Pointer to our stats structure
std::shared_ptr<MetadataCredentialsProviderStats> stats_;
// Atomic flag for cluster recreate
std::atomic<bool> is_creating_ = false;
// AWS Cluster Manager for creating clusters and retrieving URIs when async fetch is needed
AwsClusterManagerOptRef aws_cluster_manager_;
// RAII handle for callbacks from AWS cluster manager
AwsManagedClusterUpdateCallbacksHandlePtr callback_handle_;
};

/**
Expand All @@ -256,6 +229,7 @@ class InstanceProfileCredentialsProvider : public MetadataCredentialsProviderBas
public MetadataFetcher::MetadataReceiver {
public:
InstanceProfileCredentialsProvider(Api::Api& api, ServerFactoryContextOptRef context,
AwsClusterManagerOptRef aws_cluster_manager,
const CurlMetadataFetcher& fetch_metadata_using_curl,
CreateMetadataFetcherCb create_metadata_fetcher_cb,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
Expand Down Expand Up @@ -295,6 +269,7 @@ class ContainerCredentialsProvider : public MetadataCredentialsProviderBase,
public MetadataFetcher::MetadataReceiver {
public:
ContainerCredentialsProvider(Api::Api& api, ServerFactoryContextOptRef context,
AwsClusterManagerOptRef aws_cluster_manager,
const CurlMetadataFetcher& fetch_metadata_using_curl,
CreateMetadataFetcherCb create_metadata_fetcher_cb,
absl::string_view credential_uri,
Expand Down Expand Up @@ -327,12 +302,12 @@ class WebIdentityCredentialsProvider : public MetadataCredentialsProviderBase,
// not used, and vice versa.
WebIdentityCredentialsProvider(
Server::Configuration::ServerFactoryContext& context,
CreateMetadataFetcherCb create_metadata_fetcher_cb, absl::string_view sts_endpoint,
AwsClusterManagerOptRef aws_cluster_manager, absl::string_view cluster_name,
CreateMetadataFetcherCb create_metadata_fetcher_cb,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
std::chrono::seconds initialization_timer,
const envoy::extensions::common::aws::v3::AssumeRoleWithWebIdentityCredentialProvider&
web_identity_config,
absl::string_view cluster_name);
web_identity_config);

// Following functions are for MetadataFetcher::MetadataReceiver interface
void onMetadataSuccess(const std::string&& body) override;
Expand Down Expand Up @@ -380,15 +355,13 @@ class CredentialsProviderChainFactories {

virtual CredentialsProviderSharedPtr createWebIdentityCredentialsProvider(
Server::Configuration::ServerFactoryContext& context,
CreateMetadataFetcherCb create_metadata_fetcher_cb, absl::string_view sts_endpoint,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
std::chrono::seconds initialization_timer,
AwsClusterManagerOptRef aws_cluster_manager, absl::string_view region,
const envoy::extensions::common::aws::v3::AssumeRoleWithWebIdentityCredentialProvider&
web_identity_config,
absl::string_view cluster_name) const PURE;
web_identity_config) const PURE;

virtual CredentialsProviderSharedPtr createContainerCredentialsProvider(
Api::Api& api, ServerFactoryContextOptRef context, Singleton::Manager& singleton_manager,
Api::Api& api, ServerFactoryContextOptRef context,
AwsClusterManagerOptRef aws_cluster_manager,
const MetadataCredentialsProviderBase::CurlMetadataFetcher& fetch_metadata_using_curl,
CreateMetadataFetcherCb create_metadata_fetcher_cb, absl::string_view cluster_name,
absl::string_view credential_uri,
Expand All @@ -397,7 +370,8 @@ class CredentialsProviderChainFactories {
absl::string_view authorization_token = {}) const PURE;

virtual CredentialsProviderSharedPtr createInstanceProfileCredentialsProvider(
Api::Api& api, ServerFactoryContextOptRef context, Singleton::Manager& singleton_manager,
Api::Api& api, ServerFactoryContextOptRef context,
AwsClusterManagerOptRef aws_cluster_manager,
const MetadataCredentialsProviderBase::CurlMetadataFetcher& fetch_metadata_using_curl,
CreateMetadataFetcherCb create_metadata_fetcher_cb,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
Expand All @@ -415,12 +389,9 @@ class CustomCredentialsProviderChainFactories {

virtual CredentialsProviderSharedPtr createWebIdentityCredentialsProvider(
Server::Configuration::ServerFactoryContext& context,
CreateMetadataFetcherCb create_metadata_fetcher_cb, absl::string_view sts_endpoint,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
std::chrono::seconds initialization_timer,
AwsClusterManagerOptRef aws_cluster_manager, absl::string_view region,
const envoy::extensions::common::aws::v3::AssumeRoleWithWebIdentityCredentialProvider&
web_identity_config,
absl::string_view cluster_name) const PURE;
web_identity_config) const PURE;
};

// TODO(nbaws) Add additional providers to the custom chain.
Expand Down Expand Up @@ -449,16 +420,11 @@ class CustomCredentialsProviderChain : public CredentialsProviderChain,

CredentialsProviderSharedPtr createWebIdentityCredentialsProvider(
Server::Configuration::ServerFactoryContext& context,
CreateMetadataFetcherCb create_metadata_fetcher_cb, absl::string_view sts_endpoint,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
std::chrono::seconds initialization_timer,
AwsClusterManagerOptRef aws_cluster_manager, absl::string_view region,
const envoy::extensions::common::aws::v3::AssumeRoleWithWebIdentityCredentialProvider&
web_identity_config,
absl::string_view cluster_name) const override {
return std::make_shared<WebIdentityCredentialsProvider>(
context, create_metadata_fetcher_cb, sts_endpoint, refresh_state, initialization_timer,
web_identity_config, cluster_name);
};
web_identity_config) const override;

AwsClusterManagerPtr aws_cluster_manager_;
};

/**
Expand Down Expand Up @@ -518,7 +484,8 @@ class DefaultCredentialsProviderChain : public CredentialsProviderChain,
};

CredentialsProviderSharedPtr createContainerCredentialsProvider(
Api::Api& api, ServerFactoryContextOptRef context, Singleton::Manager& singleton_manager,
Api::Api& api, ServerFactoryContextOptRef context,
AwsClusterManagerOptRef aws_cluster_manager,
const MetadataCredentialsProviderBase::CurlMetadataFetcher& fetch_metadata_using_curl,
CreateMetadataFetcherCb create_metadata_fetcher_cb, absl::string_view cluster_name,
absl::string_view credential_uri,
Expand All @@ -527,24 +494,20 @@ class DefaultCredentialsProviderChain : public CredentialsProviderChain,
absl::string_view authorization_token) const override;

CredentialsProviderSharedPtr createInstanceProfileCredentialsProvider(
Api::Api& api, ServerFactoryContextOptRef context, Singleton::Manager& singleton_manager,
Api::Api& api, ServerFactoryContextOptRef context,
AwsClusterManagerOptRef aws_cluster_manager,
const MetadataCredentialsProviderBase::CurlMetadataFetcher& fetch_metadata_using_curl,
CreateMetadataFetcherCb create_metadata_fetcher_cb,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
std::chrono::seconds initialization_timer, absl::string_view cluster_name) const override;

CredentialsProviderSharedPtr createWebIdentityCredentialsProvider(
Server::Configuration::ServerFactoryContext& context,
CreateMetadataFetcherCb create_metadata_fetcher_cb, absl::string_view sts_endpoint,
MetadataFetcher::MetadataReceiver::RefreshState refresh_state,
std::chrono::seconds initialization_timer,
AwsClusterManagerOptRef aws_cluster_manager, absl::string_view region,
const envoy::extensions::common::aws::v3::AssumeRoleWithWebIdentityCredentialProvider&
web_identity_config,
absl::string_view cluster_name) const override {
return std::make_shared<WebIdentityCredentialsProvider>(
context, create_metadata_fetcher_cb, sts_endpoint, refresh_state, initialization_timer,
web_identity_config, cluster_name);
}
web_identity_config) const override;

AwsClusterManagerPtr aws_cluster_manager_;
};

using InstanceProfileCredentialsProviderPtr = std::shared_ptr<InstanceProfileCredentialsProvider>;
Expand Down
Loading