Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AWS STS authentication support #1884

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
3 changes: 2 additions & 1 deletion cpp/arcticdb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ cmake_policy(PUSH)
if (EXISTS "/usr/local/lib64/aws-c-cal/cmake/modules/FindLibCrypto.cmake") # Workaround old AWS SDK bug
cmake_policy(SET CMP0045 OLD)
endif()
find_package(AWSSDK REQUIRED COMPONENTS s3)
find_package(AWSSDK REQUIRED COMPONENTS s3 identity-management)
cmake_policy(POP)

find_package(Boost REQUIRED)
Expand Down Expand Up @@ -298,6 +298,7 @@ set(arcticdb_srcs
storage/s3/nfs_backed_storage.hpp
storage/s3/s3_client_wrapper.hpp
storage/s3/s3_storage_tool.hpp
storage/s3/s3_settings.hpp
storage/storage_factory.hpp
storage/storage_options.hpp
storage/storage.hpp
Expand Down
12 changes: 12 additions & 0 deletions cpp/arcticdb/storage/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@
#include <arcticdb/entity/protobufs.hpp>
#include <arcticdb/entity/variant_key.hpp>
#include <arcticdb/entity/performance_tracing.hpp>
#include <arcticdb/storage/s3/s3_settings.hpp>
#include <sstream>

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

namespace arcticdb::storage {


Expand Down Expand Up @@ -91,5 +95,13 @@ struct is_key_type<entity::VariantKey> : std::true_type {};
template <typename T>
inline constexpr bool is_key_type_v = is_key_type<T>::value;

using NativeVariantStorage = s3::S3Settings;
using NativeVariantStorageMap = std::map<std::string, NativeVariantStorage>; //key: storage_id
using EnvironmentNativeVariantStorageMap = std::map<std::string, NativeVariantStorageMap>; // key: env
} //namespace arcticdb::storage

// to avoid pybind11 built-in cast as it will copy the containers
// Below needs to be not within any namespace and before any usage of those
PYBIND11_MAKE_OPAQUE(arcticdb::storage::NativeVariantStorageMap)
PYBIND11_MAKE_OPAQUE(arcticdb::storage::EnvironmentNativeVariantStorageMap)

41 changes: 20 additions & 21 deletions cpp/arcticdb/storage/config_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,22 +41,12 @@ class ConfigCache {
return descriptor_map_.find(path) != descriptor_map_.end();
}

void add_library_config(const LibraryPath &path, const arcticdb::proto::storage::LibraryConfig lib_cfg) {
add_library(path, decode_library_descriptor(lib_cfg.lib_desc()));
for(const auto& storage: lib_cfg.storage_by_id())
add_storage(StorageName{storage.first}, storage.second);
}

void add_library(const LibraryPath &path, const LibraryDescriptor &desc) {
config_resolver_->add_library(environment_name_, encode_library_descriptor(desc));
std::lock_guard<std::mutex> lock{mutex_};
descriptor_map_.emplace(path, desc);
}

void add_storage(const StorageName& storage_name, const arcticdb::proto::storage::VariantStorage& storage) {
config_resolver_->add_storage(environment_name_, storage_name, storage);
}

std::vector<LibraryPath> list_libraries(std::string_view prefix) {
std::lock_guard<std::mutex> lock{mutex_};
std::vector<LibraryPath> res;
Expand All @@ -82,17 +72,28 @@ class ConfigCache {
for (const auto& storage_name : descriptor.storage_ids_) {
// Otherwise see if we have the storage config.
arcticdb::proto::storage::VariantStorage storage_conf;
bool use_proto_variantstorage = false;
auto storage_conf_pos = storage_configs_.find(storage_name);
if(storage_conf_pos != storage_configs_.end())
if(storage_conf_pos != storage_configs_.end()){
storage_conf = storage_conf_pos->second;

use_proto_variantstorage = true;
}
// As a last resort, get the whole environment config from the resolver.
refresh_config();
storage_conf_pos = storage_configs_.find(storage_name);
if(storage_conf_pos != storage_configs_.end())
if(storage_conf_pos != storage_configs_.end()){
storage_conf = storage_conf_pos->second;

storages.emplace_back(create_storage(path, mode, storage_conf));
use_proto_variantstorage = true;
}

if (use_proto_variantstorage) {
storages.emplace_back(create_storage(path, mode, storage_conf));
}
else {
auto it = native_storage_configs_.find(storage_name);
util::check(it != native_storage_configs_.end(), "Storage config not found in native and s3 configs");
storages.emplace_back(create_storage(path, mode, it->second));
}
}
return std::make_shared<Storages>(std::move(storages), mode);
}
Expand All @@ -111,18 +112,16 @@ class ConfigCache {
for(auto& [storage_name, config] : storages) {
storage_configs_.try_emplace(StorageName(storage_name), config);
}
auto default_storages = config_resolver_->get_default_storages(environment_name_);
for(auto& [storage_name, config] : default_storages) {
if (storage_configs_.find(storage_name) == storage_configs_.end()) {
config_resolver_->add_storage(environment_name_, storage_name, config);
storage_configs_.try_emplace(StorageName(storage_name), config);
}
auto native_storages = config_resolver_->get_native_storages(environment_name_);
for(auto& [storage_name, config] : native_storages) {
native_storage_configs_.try_emplace(StorageName(storage_name), config);
}
}

EnvironmentName environment_name_;
std::unordered_map<LibraryPath, LibraryDescriptor> descriptor_map_;
std::unordered_map<StorageName, arcticdb::proto::storage::VariantStorage> storage_configs_;
std::unordered_map<StorageName, NativeVariantStorage> native_storage_configs_;
std::shared_ptr<ConfigResolver> config_resolver_;
mutable std::mutex mutex_;
};
Expand Down
15 changes: 15 additions & 0 deletions cpp/arcticdb/storage/config_resolvers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ std::vector<std::pair<StorageName, arcticdb::proto::storage::VariantStorage>> In
return output;
}

std::vector<std::pair<StorageName, arcticdb::storage::NativeVariantStorage>> InMemoryConfigResolver::get_native_storages(const EnvironmentName &environment_name) const {
auto config = get_environment(environment_name);
std::vector<std::pair<StorageName, arcticdb::storage::NativeVariantStorage>> output;

for(auto& pair : config->native_storages_)
output.emplace_back(pair);

return output;
}

void InMemoryConfigResolver::add_library(const EnvironmentName& environment_name, const arcticdb::proto::storage::LibraryDescriptor& library_descriptor) {
auto& config = get_or_add_environment(environment_name);
config.libraries_.try_emplace(LibraryPath::from_delim_path(library_descriptor.name()), library_descriptor);
Expand All @@ -70,4 +80,9 @@ void InMemoryConfigResolver::add_storage(const EnvironmentName& environment_name
config.storages_.try_emplace(StorageName(storage_name), storage);
}

void InMemoryConfigResolver::add_native_storage(const EnvironmentName& environment_name, const std::string& storage_name, const arcticdb::storage::NativeVariantStorage& storage) {
auto& config = get_or_add_environment(environment_name);
config.native_storages_.try_emplace(StorageName(storage_name), storage);
}

}
10 changes: 6 additions & 4 deletions cpp/arcticdb/storage/config_resolvers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ class ConfigResolver {
//virtual std::vector<EnvironmentName> list_environments() const = 0;
virtual std::vector<std::pair<LibraryPath, arcticdb::proto::storage::LibraryDescriptor>> get_libraries(const EnvironmentName &environment_name) const = 0;
virtual std::vector<std::pair<StorageName, arcticdb::proto::storage::VariantStorage>> get_storages(const EnvironmentName &environment_name) const = 0;
virtual std::vector<std::pair<StorageName, arcticdb::storage::NativeVariantStorage>> get_native_storages(const EnvironmentName &environment_name) const = 0;
virtual void add_library(const EnvironmentName& environment_name, const arcticdb::proto::storage::LibraryDescriptor& library_descriptor) = 0;
virtual void add_storage(const EnvironmentName& environment_name, const StorageName& storage_name, const arcticdb::proto::storage::VariantStorage& storage) = 0;
virtual void add_native_storage(const EnvironmentName& environment_name, const std::string& storage_name, const arcticdb::storage::NativeVariantStorage& storage) = 0;
virtual void initialize_environment(const EnvironmentName& environment_name) = 0;
virtual std::vector<std::pair<StorageName, arcticdb::proto::storage::VariantStorage>> get_default_storages(const EnvironmentName& environment_name) const = 0;
virtual std::string_view resolver_type() const = 0;
};

Expand All @@ -40,10 +41,12 @@ namespace arcticdb::storage::details {
class InMemoryConfigResolver final : public ConfigResolver {
public:
typedef std::unordered_map<StorageName, arcticdb::proto::storage::VariantStorage> StorageMap;
typedef std::unordered_map<StorageName, NativeVariantStorage> NativeStorageMap;
typedef std::unordered_map<LibraryPath, arcticdb::proto::storage::LibraryDescriptor> LibraryMap;

struct MemoryConfig {
StorageMap storages_;
NativeStorageMap native_storages_;
LibraryMap libraries_;
};

Expand All @@ -59,12 +62,11 @@ class InMemoryConfigResolver final : public ConfigResolver {

std::vector<std::pair<LibraryPath, arcticdb::proto::storage::LibraryDescriptor>> get_libraries(const EnvironmentName &environment_name) const override;
std::vector<std::pair<StorageName, arcticdb::proto::storage::VariantStorage>> get_storages(const EnvironmentName &environment_name) const override;
std::vector<std::pair<StorageName, arcticdb::storage::NativeVariantStorage>> get_native_storages(const EnvironmentName &environment_name) const override;

void add_library(const EnvironmentName& environment_name, const arcticdb::proto::storage::LibraryDescriptor& library_descriptor) override;
void add_storage(const EnvironmentName& environment_name, const StorageName& storage_name, const arcticdb::proto::storage::VariantStorage& storage) override;
std::vector<std::pair<StorageName, arcticdb::proto::storage::VariantStorage>> get_default_storages(const EnvironmentName&) const override {
return std::vector<std::pair<StorageName, arcticdb::proto::storage::VariantStorage>>();
}
void add_native_storage(const EnvironmentName& environment_name, const std::string& storage_name, const arcticdb::storage::NativeVariantStorage& storage) override;

void initialize_environment(const EnvironmentName&) override { }
std::string_view resolver_type() const override { return "in_mem"; }
Expand Down
13 changes: 0 additions & 13 deletions cpp/arcticdb/storage/library_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,6 @@ class LibraryIndex {
return library_cache_.find(path) != library_cache_.end() || config_cache_.library_exists(path);
}

std::shared_ptr<Library> add_library_config(const LibraryPath &path, const arcticdb::proto::storage::LibraryConfig &lib_cfg, const UserAuth &) {
std::lock_guard<std::mutex> lock{mutex_};
if (has_library(path))
throw std::runtime_error(fmt::format("Can't create library {} when it already exists", path));

config_cache_.add_library_config(path, lib_cfg);
return get_library_internal(path, OpenMode::WRITE);
}

std::shared_ptr<Library> get_library(const LibraryPath &path, OpenMode mode, const UserAuth &) {
std::lock_guard<std::mutex> lock{mutex_};
auto res = library_cache_.find(path);
Expand All @@ -50,10 +41,6 @@ class LibraryIndex {
return get_library_internal(path, mode);
}

void add_storage(const StorageName& storage_name, const arcticdb::proto::storage::VariantStorage& storage) {
config_cache_.add_storage(storage_name, storage);
}

private:
std::shared_ptr<Library> get_library_internal(const LibraryPath &path, OpenMode mode) {
auto desc = config_cache_.get_descriptor(path);
Expand Down
81 changes: 78 additions & 3 deletions cpp/arcticdb/storage/python_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <arcticdb/storage/library_index.hpp>
#include <arcticdb/storage/config_resolvers.hpp>
#include <arcticdb/storage/constants.hpp>
#include <arcticdb/storage/s3/s3_storage.hpp>
#include <arcticdb/storage/s3/s3_settings.hpp>

namespace py = pybind11;

Expand Down Expand Up @@ -95,21 +97,94 @@ void register_bindings(py::module& storage, py::exception<arcticdb::ArcticExcept

storage.def("create_library_index", &create_library_index);

storage.def("create_mem_config_resolver", [](const py::object & env_config_map_py) -> std::shared_ptr<ConfigResolver> {

py::enum_<s3::AWSAuthMethod>(storage, "AWSAuthMethod")
.value("DISABLED", s3::AWSAuthMethod::DISABLED)
.value("DEFAULT_CREDENTIALS_PROVIDER_CHAIN", s3::AWSAuthMethod::DEFAULT_CREDENTIALS_PROVIDER_CHAIN)
.value("STS_PROFILE_CREDENTIALS_PROVIDER", s3::AWSAuthMethod::STS_PROFILE_CREDENTIALS_PROVIDER);

auto s3settings_def = py::class_<s3::S3Settings>(storage, "S3Settings")
.def(py::init<>())
.def_property("bucket_name", &s3::S3Settings::bucket_name, &s3::S3Settings::set_bucket_name)
.def_property("credential_name", &s3::S3Settings::credential_name, &s3::S3Settings::set_credential_name)
.def_property("credential_key", &s3::S3Settings::credential_key, &s3::S3Settings::set_credential_key)
.def_property("endpoint", &
s3::S3Settings::endpoint, &s3::S3Settings::set_endpoint)
.def_property("max_connections", &s3::S3Settings::max_connections, &s3::S3Settings::set_max_connections)
.def_property("connect_timeout", &s3::S3Settings::connect_timeout, &s3::S3Settings::set_connect_timeout)
.def_property("request_timeout", &s3::S3Settings::request_timeout, &s3::S3Settings::set_request_timeout)
.def_property("ssl", &s3::S3Settings::ssl, &s3::S3Settings::set_ssl)
.def_property("prefix", &s3::S3Settings::prefix, &s3::S3Settings::set_prefix)
.def_property("https", &s3::S3Settings::https, &s3::S3Settings::set_https)
.def_property("region", &s3::S3Settings::region, &s3::S3Settings::set_region)
.def_property("use_virtual_addressing", &s3::S3Settings::use_virtual_addressing, &s3::S3Settings::set_use_virtual_addressing)
.def_property("use_mock_storage_for_testing", &s3::S3Settings::use_mock_storage_for_testing, &s3::S3Settings::set_use_mock_storage_for_testing)
.def_property("ca_cert_path", &s3::S3Settings::ca_cert_path, &s3::S3Settings::set_ca_cert_path)
.def_property("ca_cert_dir", &s3::S3Settings::ca_cert_dir, &s3::S3Settings::set_ca_cert_dir)
.def_property("use_raw_prefix", &s3::S3Settings::use_raw_prefix, &s3::S3Settings::set_use_raw_prefix)
.def_property("aws_auth", &s3::S3Settings::aws_auth, &s3::S3Settings::set_aws_auth)
.def_property("aws_profile", &s3::S3Settings::aws_profile, &s3::S3Settings::set_aws_profile);


py::class_<NativeVariantStorageMap>(storage, "NativeVariantStorageMap")
.def(py::init<>())
.def("__getitem__", [](NativeVariantStorageMap &storage_map, const std::string &storage_id) -> NativeVariantStorage& {
return storage_map[storage_id];
}, py::return_value_policy::reference_internal)
.def("__setitem__", [](NativeVariantStorageMap &storage_map, const std::string &storage_id, const NativeVariantStorage &storage) {
storage_map[storage_id] = storage;
})
.def("__contains__", [](const NativeVariantStorageMap &storage_map, const std::string &storage_id) {
return storage_map.count(storage_id) != 0;
})
.def("values", [](const NativeVariantStorageMap &storage_map) {
std::vector<NativeVariantStorageMap::mapped_type> res;
for (const auto & [_, storage] : storage_map) {
res.push_back(storage);
}
return res;
});

py::class_<EnvironmentNativeVariantStorageMap>(storage, "EnvironmentNativeVariantStorageMap")
.def(py::init<>())
.def("__getitem__", [](EnvironmentNativeVariantStorageMap &env_map, const std::string &env) -> NativeVariantStorageMap& {
return env_map[env];
}, py::return_value_policy::reference_internal)
.def("__setitem__", [](EnvironmentNativeVariantStorageMap &env_map, const std::string &env, const NativeVariantStorageMap &storage_map) {
env_map[env] = storage_map;
})
.def("__contains__", [](const EnvironmentNativeVariantStorageMap &env_map, const std::string &env) {
return env_map.count(env) != 0;
});


storage.def("create_mem_config_resolver", [](const py::object & env_config_map_py, const std::optional<EnvironmentNativeVariantStorageMap>& native_env_storage_map) -> std::shared_ptr<ConfigResolver> {
arcticdb::proto::storage::EnvironmentConfigsMap ecm;
pb_from_python(env_config_map_py, ecm);
auto resolver = std::make_shared<storage::details::InMemoryConfigResolver>();
for(auto &[env, cfg] :ecm.env_by_id()){
EnvironmentName env_name{env};
for(auto &[id, variant_storage]: cfg.storage_by_id()){
resolver->add_storage(env_name, StorageName{id}, variant_storage);
if (variant_storage.ByteSizeLong()) {
resolver->add_storage(env_name, StorageName{id}, variant_storage);
}
}
for(auto &[id, lib_desc]: cfg.lib_by_path()){
resolver->add_library(env_name, lib_desc);
}
}
if (native_env_storage_map) {
for (const auto& [env, native_storage_map] : native_env_storage_map.value()) {
EnvironmentName env_name{env};
for (const auto& [id, native_storage] : native_storage_map) {
resolver->add_native_storage(env_name, id, native_storage);
}
}
}
return resolver;
});
},
py::arg("env_config_map"),
py::arg("native_env_storage_map") = std::nullopt);

py::class_<ConfigResolver, std::shared_ptr<ConfigResolver>>(storage, "ConfigResolver");

Expand Down
Loading
Loading