Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Make automatic use of Azure storage account keys opt-in #20652

Merged
merged 3 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 0 additions & 112 deletions crates/polars-io/src/cloud/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,6 @@ impl CloudOptions {
use super::credential_provider::IntoCredentialProvider;

let verbose = polars_core::config::verbose();
let mut storage_account: Option<polars_utils::pl_str::PlSmallStr> = None;

// The credential provider `self.credentials` is prioritized if it is set. We also need
// `from_env()` as it may source environment configured storage account name.
Expand All @@ -412,9 +411,6 @@ impl CloudOptions {
panic!("impl error: cloud type mismatch")
};
for (key, value) in options.iter() {
if key == &AzureConfigKey::AccountName {
storage_account = Some(value.into());
}
builder = builder.with_config(*key, value);
}
}
Expand All @@ -432,22 +428,7 @@ impl CloudOptions {
);
}
builder.with_credentials(v.into_azure_provider())
} else if let Some(v) = extract_adls_uri_storage_account(url) // Prefer the one embedded in the path
.map(|x| x.into())
.or(storage_account)
.as_deref()
.and_then(get_azure_storage_account_key)
{
if verbose {
eprintln!("[CloudOptions::build_azure]: Retrieved account key from Azure CLI")
}
builder.with_access_key(v)
} else {
if verbose {
eprintln!(
"[CloudOptions::build_azure]: Could not retrieve account key from Azure CLI"
)
}
builder
};

Expand Down Expand Up @@ -630,99 +611,6 @@ impl CloudOptions {
}
}

/// ```text
/// "abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/"
/// ^^^^^^^^^^^^^^^^^
/// ```
#[cfg(feature = "azure")]
fn extract_adls_uri_storage_account(path: &str) -> Option<&str> {
Some(
path.split_once("://")?
.1
.split_once('/')?
.0
.split_once('@')?
.1
.split_once(".dfs.core.windows.net")?
.0,
)
}

/// Attempt to retrieve the storage account key for this account using the Azure CLI.
#[cfg(feature = "azure")]
fn get_azure_storage_account_key(account_name: &str) -> Option<String> {
if polars_core::config::verbose() {
eprintln!(
"get_azure_storage_account_key: storage_account_name: {}",
account_name
);
}

let mut cmd = if cfg!(target_family = "windows") {
// https://github.com/apache/arrow-rs/blob/565c24b8071269b02c3937e34c51eacf0f4cbad6/object_store/src/azure/credential.rs#L877-L894
let mut v = std::process::Command::new("cmd");
v.args([
"/C",
"az",
"storage",
"account",
"keys",
"list",
"--output",
"json",
"--account-name",
account_name,
]);
v
} else {
let mut v = std::process::Command::new("az");
v.args([
"storage",
"account",
"keys",
"list",
"--output",
"json",
"--account-name",
account_name,
]);
v
};

let json_resp = cmd
.output()
.ok()
.filter(|x| x.status.success())
.map(|x| String::from_utf8(x.stdout))?
.ok()?;

// [
// {
// "creationTime": "1970-01-01T00:00:00.000000+00:00",
// "keyName": "key1",
// "permissions": "FULL",
// "value": "..."
// },
// {
// "creationTime": "1970-01-01T00:00:00.000000+00:00",
// "keyName": "key2",
// "permissions": "FULL",
// "value": "..."
// }
// ]

#[derive(Debug, serde::Deserialize)]
struct S {
value: String,
}

let resp: Vec<S> = serde_json::from_str(&json_resp).ok()?;

let access_key = resp.into_iter().next()?.value;

Some(access_key)
}

#[cfg(feature = "cloud")]
#[cfg(test)]
mod tests {
Expand Down
30 changes: 20 additions & 10 deletions py-polars/polars/io/cloud/credential_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ def __init__(
self,
*,
scopes: list[str] | None = None,
storage_account: str | None = None,
tenant_id: str | None = None,
_storage_account: str | None = None,
_verbose: bool = False,
) -> None:
"""
Expand All @@ -169,11 +169,6 @@ def __init__(
----------
scopes
Scopes to pass to `get_token`
storage_account
If specified, an attempt will be made to retrieve the account keys
for this account using the Azure CLI. If this is successful, the
account keys will be used instead of
`DefaultAzureCredential.get_token()`
tenant_id
Azure tenant ID.
"""
Expand All @@ -182,7 +177,7 @@ def __init__(

self._check_module_availability()

self.account_name = storage_account
self.account_name = _storage_account
self.tenant_id = tenant_id
# Done like this to bypass mypy, we don't have stubs for azure.identity
self.credential = importlib.import_module("azure.identity").__dict__[
Expand All @@ -196,7 +191,7 @@ def __init__(
if self._verbose:
print(
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
(
"CredentialProviderAzure "
"[CredentialProviderAzure]: "
f"{self.account_name = } "
f"{self.tenant_id = } "
f"{self.scopes = } "
Expand All @@ -206,7 +201,22 @@ def __init__(

def __call__(self) -> CredentialProviderFunctionReturn:
"""Fetch the credentials."""
if self.account_name is not None:
POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY = os.getenv(
"POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY"
)

if self._verbose:
print(
"[CredentialProviderAzure]: "
f"{self.account_name = } "
f"{POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY = }",
file=sys.stderr,
)

if (
self.account_name is not None
and POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY == "1"
):
try:
creds = {
"account_key": self._get_azure_storage_account_key_az_cli(
Expand Down Expand Up @@ -453,9 +463,9 @@ def _maybe_init_credential_provider(
)

provider = CredentialProviderAzure(
storage_account=storage_account,
tenant_id=tenant_id,
_verbose=verbose,
_storage_account=storage_account,
)
elif storage_options is not None:
return None
Expand Down
Loading