From bea1a83ca27e36cafaf2ed965b33b828ef6924df Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 13 Jan 2025 14:49:17 +1100 Subject: [PATCH 1/3] c --- crates/polars-io/src/cloud/options.rs | 112 ------------------ .../polars/io/cloud/credential_provider.py | 92 +------------- 2 files changed, 6 insertions(+), 198 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 739a7009916c..8738b927732f 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -401,7 +401,6 @@ impl CloudOptions { use super::credential_provider::IntoCredentialProvider; let verbose = polars_core::config::verbose(); - let mut storage_account: Option = None; // The credential provider `self.credentials` is prioritized if it is set. We also need // `from_env()` as it may source environment configured storage account name. @@ -412,9 +411,6 @@ impl CloudOptions { panic!("impl error: cloud type mismatch") }; for (key, value) in options.iter() { - if key == &AzureConfigKey::AccountName { - storage_account = Some(value.into()); - } builder = builder.with_config(*key, value); } } @@ -432,22 +428,7 @@ impl CloudOptions { ); } builder.with_credentials(v.into_azure_provider()) - } else if let Some(v) = extract_adls_uri_storage_account(url) // Prefer the one embedded in the path - .map(|x| x.into()) - .or(storage_account) - .as_deref() - .and_then(get_azure_storage_account_key) - { - if verbose { - eprintln!("[CloudOptions::build_azure]: Retrieved account key from Azure CLI") - } - builder.with_access_key(v) } else { - if verbose { - eprintln!( - "[CloudOptions::build_azure]: Could not retrieve account key from Azure CLI" - ) - } builder }; @@ -630,99 +611,6 @@ impl CloudOptions { } } -/// ```text -/// "abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/" -/// ^^^^^^^^^^^^^^^^^ -/// ``` -#[cfg(feature = "azure")] -fn extract_adls_uri_storage_account(path: &str) -> Option<&str> { - Some( - path.split_once("://")? - .1 - .split_once('/')? - .0 - .split_once('@')? - .1 - .split_once(".dfs.core.windows.net")? - .0, - ) -} - -/// Attempt to retrieve the storage account key for this account using the Azure CLI. -#[cfg(feature = "azure")] -fn get_azure_storage_account_key(account_name: &str) -> Option { - if polars_core::config::verbose() { - eprintln!( - "get_azure_storage_account_key: storage_account_name: {}", - account_name - ); - } - - let mut cmd = if cfg!(target_family = "windows") { - // https://github.com/apache/arrow-rs/blob/565c24b8071269b02c3937e34c51eacf0f4cbad6/object_store/src/azure/credential.rs#L877-L894 - let mut v = std::process::Command::new("cmd"); - v.args([ - "/C", - "az", - "storage", - "account", - "keys", - "list", - "--output", - "json", - "--account-name", - account_name, - ]); - v - } else { - let mut v = std::process::Command::new("az"); - v.args([ - "storage", - "account", - "keys", - "list", - "--output", - "json", - "--account-name", - account_name, - ]); - v - }; - - let json_resp = cmd - .output() - .ok() - .filter(|x| x.status.success()) - .map(|x| String::from_utf8(x.stdout))? - .ok()?; - - // [ - // { - // "creationTime": "1970-01-01T00:00:00.000000+00:00", - // "keyName": "key1", - // "permissions": "FULL", - // "value": "..." - // }, - // { - // "creationTime": "1970-01-01T00:00:00.000000+00:00", - // "keyName": "key2", - // "permissions": "FULL", - // "value": "..." - // } - // ] - - #[derive(Debug, serde::Deserialize)] - struct S { - value: String, - } - - let resp: Vec = serde_json::from_str(&json_resp).ok()?; - - let access_key = resp.into_iter().next()?.value; - - Some(access_key) -} - #[cfg(feature = "cloud")] #[cfg(test)] mod tests { diff --git a/py-polars/polars/io/cloud/credential_provider.py b/py-polars/polars/io/cloud/credential_provider.py index c515e3450d08..174206660fa5 100644 --- a/py-polars/polars/io/cloud/credential_provider.py +++ b/py-polars/polars/io/cloud/credential_provider.py @@ -156,7 +156,6 @@ def __init__( self, *, scopes: list[str] | None = None, - storage_account: str | None = None, tenant_id: str | None = None, _verbose: bool = False, ) -> None: @@ -169,11 +168,6 @@ def __init__( ---------- scopes Scopes to pass to `get_token` - storage_account - If specified, an attempt will be made to retrieve the account keys - for this account using the Azure CLI. If this is successful, the - account keys will be used instead of - `DefaultAzureCredential.get_token()` tenant_id Azure tenant ID. """ @@ -182,7 +176,6 @@ def __init__( self._check_module_availability() - self.account_name = storage_account self.tenant_id = tenant_id # Done like this to bypass mypy, we don't have stubs for azure.identity self.credential = importlib.import_module("azure.identity").__dict__[ @@ -197,7 +190,6 @@ def __init__( print( ( "CredentialProviderAzure " - f"{self.account_name = } " f"{self.tenant_id = } " f"{self.scopes = } " ), @@ -206,28 +198,6 @@ def __init__( def __call__(self) -> CredentialProviderFunctionReturn: """Fetch the credentials.""" - if self.account_name is not None: - try: - creds = { - "account_key": self._get_azure_storage_account_key_az_cli( - self.account_name - ) - } - - if self._verbose: - print( - "[CredentialProviderAzure]: Retrieved account key from Azure CLI", - file=sys.stderr, - ) - except Exception as e: - if self._verbose: - print( - f"[CredentialProviderAzure]: Could not retrieve account key from Azure CLI: {e}", - file=sys.stderr, - ) - else: - return creds, None # type: ignore[return-value] - token = self.credential.get_token(*self.scopes, tenant_id=self.tenant_id) return { @@ -240,51 +210,6 @@ def _check_module_availability(cls) -> None: msg = "azure-identity must be installed to use `CredentialProviderAzure`" raise ImportError(msg) - @staticmethod - def _extract_adls_uri_storage_account(uri: str) -> str | None: - # "abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/" - # ^^^^^^^^^^^^^^^^^ - try: - return ( - uri.split("://", 1)[1] - .split("/", 1)[0] - .split("@", 1)[1] - .split(".dfs.core.windows.net", 1)[0] - ) - - except IndexError: - return None - - @classmethod - def _get_azure_storage_account_key_az_cli(cls, account_name: str) -> str: - # [ - # { - # "creationTime": "1970-01-01T00:00:00.000000+00:00", - # "keyName": "key1", - # "permissions": "FULL", - # "value": "..." - # }, - # { - # "creationTime": "1970-01-01T00:00:00.000000+00:00", - # "keyName": "key2", - # "permissions": "FULL", - # "value": "..." - # } - # ] - - return json.loads( - cls._azcli( - "storage", - "account", - "keys", - "list", - "--output", - "json", - "--account-name", - account_name, - ) - )[0]["value"] - @classmethod def _azcli_version(cls) -> str | None: try: @@ -423,7 +348,6 @@ def _maybe_init_credential_provider( # For Azure we dispatch to `azure.identity` as much as possible if _is_azure_cloud(scheme): tenant_id = None - storage_account = None if storage_options is not None: for k, v in storage_options.items(): @@ -437,23 +361,19 @@ def _maybe_init_credential_provider( "authority_id", }: tenant_id = v - elif k in {"azure_storage_account_name", "account_name"}: - storage_account = v - elif k in {"azure_use_azure_cli", "use_azure_cli"}: + elif k in { + "azure_storage_account_name", + "account_name", + "azure_use_azure_cli", + "use_azure_cli", + }: continue else: # We assume some sort of access key was given, so we # just dispatch to the rust side. return None - storage_account = ( - # Prefer the one embedded in the path - CredentialProviderAzure._extract_adls_uri_storage_account(str(path)) - or storage_account - ) - provider = CredentialProviderAzure( - storage_account=storage_account, tenant_id=tenant_id, _verbose=verbose, ) From 940fc63440f53af4e0729ddac5eae10e25bd5b4b Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 13 Jan 2025 15:04:59 +1100 Subject: [PATCH 2/3] c --- .../polars/io/cloud/credential_provider.py | 105 ++++++++++++++++-- 1 file changed, 98 insertions(+), 7 deletions(-) diff --git a/py-polars/polars/io/cloud/credential_provider.py b/py-polars/polars/io/cloud/credential_provider.py index 174206660fa5..568e2c91f235 100644 --- a/py-polars/polars/io/cloud/credential_provider.py +++ b/py-polars/polars/io/cloud/credential_provider.py @@ -157,6 +157,7 @@ def __init__( *, scopes: list[str] | None = None, tenant_id: str | None = None, + _storage_account: str | None = None, _verbose: bool = False, ) -> None: """ @@ -176,6 +177,7 @@ def __init__( self._check_module_availability() + self.account_name = _storage_account self.tenant_id = tenant_id # Done like this to bypass mypy, we don't have stubs for azure.identity self.credential = importlib.import_module("azure.identity").__dict__[ @@ -189,7 +191,8 @@ def __init__( if self._verbose: print( ( - "CredentialProviderAzure " + "[CredentialProviderAzure]: " + f"{self.account_name = } " f"{self.tenant_id = } " f"{self.scopes = } " ), @@ -198,6 +201,44 @@ def __init__( def __call__(self) -> CredentialProviderFunctionReturn: """Fetch the credentials.""" + + POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY = os.getenv( + "POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY" + ) + + if self._verbose: + print( + "[CredentialProviderAzure]: " + f"{self.account_name = } " + f"{POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY = }", + file=sys.stderr, + ) + + if ( + self.account_name is not None + and POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY == "1" + ): + try: + creds = { + "account_key": self._get_azure_storage_account_key_az_cli( + self.account_name + ) + } + + if self._verbose: + print( + "[CredentialProviderAzure]: Retrieved account key from Azure CLI", + file=sys.stderr, + ) + except Exception as e: + if self._verbose: + print( + f"[CredentialProviderAzure]: Could not retrieve account key from Azure CLI: {e}", + file=sys.stderr, + ) + else: + return creds, None # type: ignore[return-value] + token = self.credential.get_token(*self.scopes, tenant_id=self.tenant_id) return { @@ -210,6 +251,51 @@ def _check_module_availability(cls) -> None: msg = "azure-identity must be installed to use `CredentialProviderAzure`" raise ImportError(msg) + @staticmethod + def _extract_adls_uri_storage_account(uri: str) -> str | None: + # "abfss://{CONTAINER}@{STORAGE_ACCOUNT}.dfs.core.windows.net/" + # ^^^^^^^^^^^^^^^^^ + try: + return ( + uri.split("://", 1)[1] + .split("/", 1)[0] + .split("@", 1)[1] + .split(".dfs.core.windows.net", 1)[0] + ) + + except IndexError: + return None + + @classmethod + def _get_azure_storage_account_key_az_cli(cls, account_name: str) -> str: + # [ + # { + # "creationTime": "1970-01-01T00:00:00.000000+00:00", + # "keyName": "key1", + # "permissions": "FULL", + # "value": "..." + # }, + # { + # "creationTime": "1970-01-01T00:00:00.000000+00:00", + # "keyName": "key2", + # "permissions": "FULL", + # "value": "..." + # } + # ] + + return json.loads( + cls._azcli( + "storage", + "account", + "keys", + "list", + "--output", + "json", + "--account-name", + account_name, + ) + )[0]["value"] + @classmethod def _azcli_version(cls) -> str | None: try: @@ -348,6 +434,7 @@ def _maybe_init_credential_provider( # For Azure we dispatch to `azure.identity` as much as possible if _is_azure_cloud(scheme): tenant_id = None + storage_account = None if storage_options is not None: for k, v in storage_options.items(): @@ -361,21 +448,25 @@ def _maybe_init_credential_provider( "authority_id", }: tenant_id = v - elif k in { - "azure_storage_account_name", - "account_name", - "azure_use_azure_cli", - "use_azure_cli", - }: + elif k in {"azure_storage_account_name", "account_name"}: + storage_account = v + elif k in {"azure_use_azure_cli", "use_azure_cli"}: continue else: # We assume some sort of access key was given, so we # just dispatch to the rust side. return None + storage_account = ( + # Prefer the one embedded in the path + CredentialProviderAzure._extract_adls_uri_storage_account(str(path)) + or storage_account + ) + provider = CredentialProviderAzure( tenant_id=tenant_id, _verbose=verbose, + _storage_account=storage_account, ) elif storage_options is not None: return None From 46f6001c3d3ccc65786e291981474b915d2253a2 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Mon, 13 Jan 2025 16:57:09 +1100 Subject: [PATCH 3/3] lint --- py-polars/polars/io/cloud/credential_provider.py | 1 - 1 file changed, 1 deletion(-) diff --git a/py-polars/polars/io/cloud/credential_provider.py b/py-polars/polars/io/cloud/credential_provider.py index 568e2c91f235..a73347df282a 100644 --- a/py-polars/polars/io/cloud/credential_provider.py +++ b/py-polars/polars/io/cloud/credential_provider.py @@ -201,7 +201,6 @@ def __init__( def __call__(self) -> CredentialProviderFunctionReturn: """Fetch the credentials.""" - POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY = os.getenv( "POLARS_AUTO_USE_AZURE_STORAGE_ACCOUNT_KEY" )