From 07991443a7805840aa1c4783b13964c2a53c8700 Mon Sep 17 00:00:00 2001 From: Sam Ansmink Date: Thu, 31 Aug 2023 17:22:15 +0200 Subject: [PATCH] also fetch region, update duckdb --- .github/workflows/MinioTests.yml | 1 - README.md | 44 ++++++++++---- duckdb | 2 +- scripts/create_minio_credential_file.sh | 24 +++++++- src/aws_extension.cpp | 80 +++++++++++++++++++++---- test/sql/aws_env_var.test | 19 +++--- test/sql/aws_errors.test | 10 +--- test/sql/aws_minio.test | 49 +++++++++++---- 8 files changed, 168 insertions(+), 61 deletions(-) diff --git a/.github/workflows/MinioTests.yml b/.github/workflows/MinioTests.yml index 7d21920..5ebd4e0 100644 --- a/.github/workflows/MinioTests.yml +++ b/.github/workflows/MinioTests.yml @@ -16,7 +16,6 @@ jobs: duckdb_version: [ '' ] env: S3_TEST_SERVER_AVAILABLE: 1 - AWS_DEFAULT_REGION: eu-west-1 DUCKDB_S3_ENDPOINT: duckdb-minio.com:9000 DUCKDB_S3_USE_SSL: false GEN: ninja diff --git a/README.md b/README.md index c5181e6..683ee8b 100644 --- a/README.md +++ b/README.md @@ -18,22 +18,42 @@ The extension is tested & distributed for Linux (x64), MacOS (x64, arm64) and Wi | `load_aws_credentials` | Pragma call function | Automatically loads the AWS credentials through the Default AWS Credentials Provider Chain | -## Examples +## Usage ### Load AWS Credentials -Input: +Firstly ensure the `aws` and `httpfs` extensions are loaded and installed: ```sql -D load aws; -D load httpfs; -D CALL load_aws_credentials() +D install aws; load aws; install httpfs; load httpfs; ``` -Result: +Then to load the aws credentials run: ```sql D call load_aws_credentials(); -┌──────────────────────┐ -│ loaded_key │ -│ varchar │ -├──────────────────────┤ -│ AKIAIOSFODNN7EXAMPLE │ -└──────────────────────┘ +┌─────────────────────────┬──────────────────────────┬──────────────────────┬───────────────┐ +│ loaded_access_key_id │ loaded_secret_access_key │ loaded_session_token │ loaded_region │ +│ varchar │ varchar │ varchar │ varchar │ +├─────────────────────────┼──────────────────────────┼──────────────────────┼───────────────┤ +│ AKIAIOSFODNN7EXAMPLE │ │ │ eu-west-1 │ +└─────────────────────────┴──────────────────────────┴──────────────────────┴───────────────┘ +``` +The function takes a string parameter to specify a specific profile: +```sql +D call load_aws_credentials('minio-testing-2'); +┌──────────────────────┬──────────────────────────┬──────────────────────┬───────────────┐ +│ loaded_access_key_id │ loaded_secret_access_key │ loaded_session_token │ loaded_region │ +│ varchar │ varchar │ varchar │ varchar │ +├──────────────────────┼──────────────────────────┼──────────────────────┼───────────────┤ +│ minio_duckdb_user_2 │ │ │ eu-west-2 │ +└──────────────────────┴──────────────────────────┴──────────────────────┴───────────────┘ ``` + +There are several parameters to tweak the behaviour of the call: +```sql +D call load_aws_credentials('minio-testing-2', set_region=false, redact_secret=false); +┌──────────────────────┬──────────────────────────────┬──────────────────────┬───────────────┐ +│ loaded_access_key_id │ loaded_secret_access_key │ loaded_session_token │ loaded_region │ +│ varchar │ varchar │ varchar │ varchar │ +├──────────────────────┼──────────────────────────────┼──────────────────────┼───────────────┤ +│ minio_duckdb_user_2 │ minio_duckdb_user_password_2 │ │ │ +└──────────────────────┴──────────────────────────────┴──────────────────────┴───────────────┘ + +``` \ No newline at end of file diff --git a/duckdb b/duckdb index acbbfe0..2469687 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit acbbfe0e79fdfef712a8dc317af407c12006814f +Subproject commit 2469687513a8fa5be8d865edce9b6df0a0e5b7af diff --git a/scripts/create_minio_credential_file.sh b/scripts/create_minio_credential_file.sh index e97ab22..92ed7e0 100755 --- a/scripts/create_minio_credential_file.sh +++ b/scripts/create_minio_credential_file.sh @@ -4,11 +4,14 @@ # Set the file path for the credentials file credentials_file=~/.aws/credentials -# create dir if not already existend +# Set the file path for the config file +config_file=~/.aws/config + +# create dir if not already exists mkdir -p ~/.aws # Create the credentials configuration -credentials_config="[default] +credentials_str="[default] aws_access_key_id=minio_duckdb_user aws_secret_access_key=minio_duckdb_user_password @@ -19,7 +22,22 @@ aws_secret_access_key=minio_duckdb_user_password_2 [minio-testing-invalid] aws_access_key_id=minio_duckdb_user_invalid aws_secret_access_key=thispasswordiscompletelywrong +aws_session_token=completelybogussessiontoken " # Write the credentials configuration to the file -echo "$credentials_config" > "$credentials_file" \ No newline at end of file +echo "$credentials_str" > "$credentials_file" + +# Create the credentials configuration +config_str="[default] +region=eu-west-1 + +[profile minio-testing-2] +region=eu-west-2 + +[profile minio-testing-invalid] +region=the-moon-123 +" + +# Write the config to the file +echo "$config_str" > "$config_file" \ No newline at end of file diff --git a/src/aws_extension.cpp b/src/aws_extension.cpp index e531792..e1467a2 100644 --- a/src/aws_extension.cpp +++ b/src/aws_extension.cpp @@ -7,12 +7,19 @@ #include #include #include -#include +#include namespace duckdb { +struct SetCredentialsResult { + string set_access_key_id; + string set_secret_access_key; + string set_session_token; + string set_region; +}; + //! Set the DuckDB AWS Credentials using the DefaultAWSCredentialsProviderChain -static string TrySetAwsCredentials(DBConfig& config, const string& profile) { +static SetCredentialsResult TrySetAwsCredentials(DBConfig& config, const string& profile, bool set_region) { Aws::SDKOptions options; Aws::InitAPI(options); Aws::Auth::AWSCredentials credentials; @@ -27,12 +34,26 @@ static string TrySetAwsCredentials(DBConfig& config, const string& profile) { credentials = provider.GetAWSCredentials(); } - string ret; + auto s3_config = Aws::Client::ClientConfiguration(profile.c_str()); + auto region = s3_config.region; + + // TODO: We would also like to get the endpoint here, but it's currently not supported by the AWS SDK: + // https://github.com/aws/aws-sdk-cpp/issues/2587 + + + SetCredentialsResult ret; if (!credentials.IsExpiredOrEmpty()) { config.SetOption("s3_access_key_id", Value(credentials.GetAWSAccessKeyId())); config.SetOption("s3_secret_access_key", Value(credentials.GetAWSSecretKey())); config.SetOption("s3_session_token", Value(credentials.GetSessionToken())); - ret = credentials.GetAWSAccessKeyId(); + ret.set_access_key_id = credentials.GetAWSAccessKeyId(); + ret.set_secret_access_key = credentials.GetAWSSecretKey(); + ret.set_session_token = credentials.GetSessionToken(); + } + + if (!region.empty() && set_region) { + config.SetOption("s3_region", Value(region)); + ret.set_region = region; } Aws::ShutdownAPI(options); @@ -42,18 +63,38 @@ static string TrySetAwsCredentials(DBConfig& config, const string& profile) { struct SetAWSCredentialsFunctionData : public TableFunctionData { string profile_name; bool finished = false; + bool set_region = true; + bool redact_secret = true; }; static unique_ptr LoadAWSCredentialsBind(ClientContext &context, TableFunctionBindInput &input, vector &return_types, vector &names) { auto result = make_uniq(); + for (const auto& option : input.named_parameters) { + if (option.first == "set_region") { + result->set_region = BooleanValue::Get(option.second); + } else if (option.first == "redact_secret") { + result->redact_secret = BooleanValue::Get(option.second); + } + } + if (input.inputs.size() >= 1) { result->profile_name = input.inputs[0].ToString(); } return_types.emplace_back(LogicalType::VARCHAR); - names.emplace_back("loaded_key"); + names.emplace_back("loaded_access_key_id"); + + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("loaded_secret_access_key"); + + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("loaded_session_token"); + + return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("loaded_region"); + return std::move(result); } @@ -67,10 +108,18 @@ static void LoadAWSCredentialsFun(ClientContext &context, TableFunctionInput &da throw MissingExtensionException("httpfs extension is required for load_aws_credentials"); } - //! Return the Key ID of the key we found, or NULL if none was found - auto key_loaded = TrySetAwsCredentials(DBConfig::GetConfig(context), data.profile_name); - auto ret_val = !key_loaded.empty() ? Value(key_loaded) : Value(nullptr); - output.SetValue(0,0,ret_val); + auto load_result = TrySetAwsCredentials(DBConfig::GetConfig(context), data.profile_name, data.set_region); + + // Set return values for all modified params + output.SetValue(0,0, load_result.set_access_key_id.empty() ? Value(nullptr) : load_result.set_access_key_id); + if (data.redact_secret && !load_result.set_secret_access_key.empty()) { + output.SetValue(1,0,""); + } else { + output.SetValue(1,0,load_result.set_secret_access_key.empty() ? Value(nullptr) : load_result.set_secret_access_key); + } + output.SetValue(2,0,load_result.set_session_token.empty() ? Value(nullptr) : load_result.set_session_token); + output.SetValue(3,0,load_result.set_region.empty() ? Value(nullptr) : load_result.set_region); + output.SetCardinality(1); data.finished = true; @@ -78,8 +127,17 @@ static void LoadAWSCredentialsFun(ClientContext &context, TableFunctionInput &da static void LoadInternal(DuckDB &db) { TableFunctionSet function_set("load_aws_credentials"); - function_set.AddFunction(TableFunction("load_aws_credentials", {}, LoadAWSCredentialsFun, LoadAWSCredentialsBind)); - function_set.AddFunction(TableFunction("load_aws_credentials", {LogicalTypeId::VARCHAR}, LoadAWSCredentialsFun, LoadAWSCredentialsBind)); + auto base_fun = TableFunction("load_aws_credentials", {}, LoadAWSCredentialsFun, LoadAWSCredentialsBind); + auto profile_fun = TableFunction("load_aws_credentials", {LogicalTypeId::VARCHAR}, LoadAWSCredentialsFun, LoadAWSCredentialsBind); + + base_fun.named_parameters["set_region"] = LogicalTypeId::BOOLEAN; + base_fun.named_parameters["redact_secret"] = LogicalTypeId::BOOLEAN; + profile_fun.named_parameters["set_region"] = LogicalTypeId::BOOLEAN; + profile_fun.named_parameters["redact_secret"] = LogicalTypeId::BOOLEAN; + + function_set.AddFunction(base_fun); + function_set.AddFunction(profile_fun); + ExtensionUtil::RegisterFunction(*db.instance, function_set); } diff --git a/test/sql/aws_env_var.test b/test/sql/aws_env_var.test index 691878d..8a52443 100644 --- a/test/sql/aws_env_var.test +++ b/test/sql/aws_env_var.test @@ -10,10 +10,8 @@ require-env AWS_ACCESS_KEY_ID require-env AWS_SECRET_ACCESS_KEY -query I +statement ok CALL load_aws_credentials(); ----- -minio_duckdb_user query I select value from duckdb_settings() where name='s3_secret_access_key'; @@ -25,20 +23,17 @@ select value from duckdb_settings() where name='s3_access_key_id'; ---- minio_duckdb_user -# Trying to access a profile that doesn't exist should return NULL and not change anything -query I +statement ok +set s3_access_key_id='bogus'; + +statement ok CALL load_aws_credentials('profile-doesnt-exists-altogether'); ----- -NULL -# Same for passing null as a profile, it does nothing. -query I +statement ok CALL load_aws_credentials(NULL); ----- -NULL # Key is untouched query I select value from duckdb_settings() where name='s3_access_key_id'; ---- -minio_duckdb_user \ No newline at end of file +bogus \ No newline at end of file diff --git a/test/sql/aws_errors.test b/test/sql/aws_errors.test index 733bbf7..960627f 100644 --- a/test/sql/aws_errors.test +++ b/test/sql/aws_errors.test @@ -19,11 +19,5 @@ httpfs extension is required for load_aws_credentials require httpfs -require-env AWS_ACCESS_KEY_ID - -require-env AWS_SECRET_ACCESS_KEY - -query I -CALL load_aws_credentials(); ----- -minio_duckdb_user \ No newline at end of file +statement ok +CALL load_aws_credentials(); \ No newline at end of file diff --git a/test/sql/aws_minio.test b/test/sql/aws_minio.test index 8002f59..1fbb7db 100644 --- a/test/sql/aws_minio.test +++ b/test/sql/aws_minio.test @@ -17,10 +17,10 @@ require-env S3_TEST_SERVER_AVAILABLE 1 set ignore_error_messages # Without params, this will use the DefaultAWSCredentialsProviderChain (https://sdk.amazonaws.com/cpp/api/LATEST/root/html/md_docs_2_credentials___providers.html) -query I +query IIII CALL load_aws_credentials(); ---- -minio_duckdb_user +minio_duckdb_user NULL eu-west-1 query I select value from duckdb_settings() where name='s3_secret_access_key'; @@ -33,20 +33,32 @@ select value from duckdb_settings() where name='s3_access_key_id'; minio_duckdb_user # You can specify which config profile to use, this uses the ProfileConfigFileAWSCredentialsProvider directly -query I +query IIII CALL load_aws_credentials('minio-testing-2'); ---- -minio_duckdb_user_2 +minio_duckdb_user_2 NULL eu-west-2 + +# You can disable secret redaction to make load_aws_credentials print the secret key +query IIII +CALL load_aws_credentials(redact_secret=false); +---- +minio_duckdb_user minio_duckdb_user_password NULL eu-west-1 + +# You can also skip loading the region to only set the main credentials +query IIII +CALL load_aws_credentials(set_region=false); +---- +minio_duckdb_user NULL NULL query I select value from duckdb_settings() where name='s3_secret_access_key'; ---- -minio_duckdb_user_password_2 +minio_duckdb_user_password query I select value from duckdb_settings() where name='s3_access_key_id'; ---- -minio_duckdb_user_2 +minio_duckdb_user statement ok CALL load_aws_credentials(); @@ -60,11 +72,22 @@ SELECT * FROM 's3://test-bucket/test_basic/test.csv'; 123 # Now when we select a failing profile, the query should fail -query I +query IIII CALL load_aws_credentials('minio-testing-invalid'); ---- -minio_duckdb_user_invalid +minio_duckdb_user_invalid completelybogussessiontoken the-moon-123 +# Malformed region: throws 400 +statement error +SELECT * FROM 's3://test-bucket/test_basic/test.csv'; +---- +HTTP 400 + +# reset region +statement ok +set s3_region='eu-west-1'; + +# now http 403 is thrown for invalid credentials statement error SELECT * FROM 's3://test-bucket/test_basic/test.csv'; ---- @@ -79,16 +102,16 @@ SELECT * FROM 's3://test-bucket/test_basic/test.csv'; ---- 123 -# Trying to access a profile that doesn't exist should return NULL and not change anything -query I +# Trying to access a profile that doesn't exist will load the default profile +query IIII CALL load_aws_credentials('profile-doesnt-exists-altogether'); ---- -NULL +NULL NULL NULL eu-west-1 -query I +query IIII CALL load_aws_credentials(NULL); ---- -NULL +NULL NULL NULL eu-west-1 # Key is untouched query I