From e50d1a9753d49ee6a52b0f280bad29db55b80422 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 26 Jul 2024 14:54:02 +1000 Subject: [PATCH 1/5] feat: Support authentication with HuggingFace login --- crates/polars-io/src/cloud/options.rs | 50 +++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index c9958f4b745e..a75b90fb15ac 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -22,6 +22,7 @@ use object_store::ClientOptions; use object_store::{BackoffConfig, RetryConfig}; #[cfg(feature = "aws")] use once_cell::sync::Lazy; +use polars_core::config; use polars_error::*; #[cfg(feature = "aws")] use polars_utils::cache::FastFixedCache; @@ -475,25 +476,62 @@ impl CloudOptions { #[cfg(feature = "http")] { let mut this = Self::default(); + let mut token = None; + let verbose = config::verbose(); if let Ok(v) = std::env::var("HF_TOKEN") { - this.config = Some(CloudConfig::Http { - headers: vec![("Authorization".into(), format!("Bearer {}", v))], - }) + if verbose { + eprintln!("HF token sourced from HF_TOKEN env var"); + } + token = Some(v); } for (i, (k, v)) in config.into_iter().enumerate() { let (k, v) = (k.as_ref(), v.into()); if i == 0 && k == "token" { - this.config = Some(CloudConfig::Http { - headers: vec![("Authorization".into(), format!("Bearer {}", v))], - }) + if verbose { + eprintln!("HF token sourced from storage_options"); + } + token = Some(v); } else { polars_bail!(ComputeError: "unknown configuration key: {}", k) } } + if token.is_none() { + token = (|| { + let hf_home = std::env::var("HF_TOKEN"); + let hf_home = hf_home.as_deref(); + let hf_home = hf_home.unwrap_or("~/.cache/huggingface"); + let hf_home = resolve_homedir(std::path::Path::new(&hf_home)); + let cached_token_path = hf_home.join("token"); + + let v = std::string::String::from_utf8( + std::fs::read(&cached_token_path).ok()?, + ) + .ok()?; + + if v.is_empty() { + None + } else { + if verbose { + eprintln!( + "HF token sourced from {}", + cached_token_path.to_str().unwrap() + ); + } + Some(v) + } + })(); + } + + if let Some(v) = token { + this.config = Some(CloudConfig::Http { + headers: vec![("Authorization".into(), format!("Bearer {}", v))], + }) + } + Ok(this) } #[cfg(not(feature = "http"))] From 38ad58eeca8116db500d343ced84c5e891a4c247 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 26 Jul 2024 14:54:16 +1000 Subject: [PATCH 2/5] c --- crates/polars-io/src/cloud/options.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index a75b90fb15ac..8e080caf3cfe 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -22,7 +22,6 @@ use object_store::ClientOptions; use object_store::{BackoffConfig, RetryConfig}; #[cfg(feature = "aws")] use once_cell::sync::Lazy; -use polars_core::config; use polars_error::*; #[cfg(feature = "aws")] use polars_utils::cache::FastFixedCache; @@ -475,6 +474,8 @@ impl CloudOptions { CloudType::Hf => { #[cfg(feature = "http")] { + use polars_core::config; + let mut this = Self::default(); let mut token = None; let verbose = config::verbose(); From dfe5c166665fa97e441f5c3e1b1814754f2530c6 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 26 Jul 2024 15:00:04 +1000 Subject: [PATCH 3/5] c --- crates/polars-io/src/cloud/options.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 8e080caf3cfe..b400c5191e02 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -502,7 +502,7 @@ impl CloudOptions { if token.is_none() { token = (|| { - let hf_home = std::env::var("HF_TOKEN"); + let hf_home = std::env::var("HF_HOME"); let hf_home = hf_home.as_deref(); let hf_home = hf_home.unwrap_or("~/.cache/huggingface"); let hf_home = resolve_homedir(std::path::Path::new(&hf_home)); From c663bc04d55d194f2a0b154c05bde9e1d141d54b Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 26 Jul 2024 15:06:43 +1000 Subject: [PATCH 4/5] c --- crates/polars-io/src/cloud/options.rs | 45 +++++++++++++-------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index b400c5191e02..1ec0879e512f 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -480,13 +480,6 @@ impl CloudOptions { let mut token = None; let verbose = config::verbose(); - if let Ok(v) = std::env::var("HF_TOKEN") { - if verbose { - eprintln!("HF token sourced from HF_TOKEN env var"); - } - token = Some(v); - } - for (i, (k, v)) in config.into_iter().enumerate() { let (k, v) = (k.as_ref(), v.into()); @@ -496,12 +489,19 @@ impl CloudOptions { } token = Some(v); } else { - polars_bail!(ComputeError: "unknown configuration key: {}", k) + polars_bail!(ComputeError: "unknown configuration key for HF: {}", k) } } - if token.is_none() { - token = (|| { + token = token + .or_else(|| { + let v = std::env::var("HF_TOKEN").ok(); + if v.is_some() && verbose { + eprintln!("HF token sourced from HF_TOKEN env var"); + } + v + }) + .or_else(|| { let hf_home = std::env::var("HF_HOME"); let hf_home = hf_home.as_deref(); let hf_home = hf_home.unwrap_or("~/.cache/huggingface"); @@ -511,21 +511,18 @@ impl CloudOptions { let v = std::string::String::from_utf8( std::fs::read(&cached_token_path).ok()?, ) - .ok()?; - - if v.is_empty() { - None - } else { - if verbose { - eprintln!( - "HF token sourced from {}", - cached_token_path.to_str().unwrap() - ); - } - Some(v) + .ok() + .filter(|x| !x.is_empty()); + + if v.is_some() && verbose { + eprintln!( + "HF token sourced from {}", + cached_token_path.to_str().unwrap() + ); } - })(); - } + + v + }); if let Some(v) = token { this.config = Some(CloudConfig::Http { From 4c91ae1977b9e5380b78a203972abdb0d7344b25 Mon Sep 17 00:00:00 2001 From: Simon Lin Date: Fri, 26 Jul 2024 15:10:36 +1000 Subject: [PATCH 5/5] c --- crates/polars-io/src/cloud/options.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/polars-io/src/cloud/options.rs b/crates/polars-io/src/cloud/options.rs index 1ec0879e512f..f544765ddad9 100644 --- a/crates/polars-io/src/cloud/options.rs +++ b/crates/polars-io/src/cloud/options.rs @@ -39,8 +39,6 @@ use url::Url; #[cfg(feature = "file_cache")] use crate::file_cache::get_env_file_cache_ttl; #[cfg(feature = "aws")] -use crate::path_utils::resolve_homedir; -#[cfg(feature = "aws")] use crate::pl_async::with_concurrency_budget; #[cfg(feature = "aws")] @@ -211,6 +209,8 @@ fn read_config( builder: &mut AmazonS3Builder, items: &[(&Path, &[(&str, AmazonS3ConfigKey)])], ) -> Option<()> { + use crate::path_utils::resolve_homedir; + for (path, keys) in items { if keys .iter() @@ -476,6 +476,8 @@ impl CloudOptions { { use polars_core::config; + use crate::path_utils::resolve_homedir; + let mut this = Self::default(); let mut token = None; let verbose = config::verbose();