From d73269b723e67cb1436aef6790a3982ef8b98af4 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 19 May 2024 21:26:47 +0200 Subject: [PATCH] Do Not Warn for Redirects to Given Patterns (#85) * feat: new `--do-not-warn-for-redirect-to` cli option Mutes warnings caused by redirections which lead to an url matching the given patterns. * doc(Readme): add the `do-not-warn-for-redirect-to` option to the example `.mlc.toml` --- README.md | 3 +++ src/cli.rs | 11 ++++++++++ src/lib.rs | 13 +++++++++++- src/link_validator/http.rs | 42 ++++++++++++++++++++++++++++---------- src/link_validator/mod.rs | 5 +++-- tests/end_to_end.rs | 2 ++ 6 files changed, 62 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 1fbb220..a47691a 100644 --- a/README.md +++ b/README.md @@ -109,6 +109,7 @@ The following arguments are available: | `` | | Only positional argument. Path to directory which shall be checked with all sub-dirs. Can also be a specific filename which shall be checked. | | `--help` | `-h` | Print help | | `--debug` | `-d` | Show verbose debug information | +| `--do-not-warn-for-redirect-to` | | Do not warn for links which redirect to the given URL. Allows the same link format as `--ignore-links`. For example, `--do-not-warn-for-redirect-to "http*://crates.io*"` will not warn for links which redirect to the `crates.io` website. | | `--offline` | `-o` | Do not check any web links. Renamed from `--no-web-links` which is still an alias for downwards compatibility | | `--match-file-extension` | `-e` | Set the flag, if the file extension shall be checked as well. For example the following markup link `[link](dir/file)` matches if for example a file called `file.md` exists in `dir`, but would fail when the `--match-file-extension` flag is set. | | `--version` | `-V` | Print current version of mlc | @@ -123,6 +124,8 @@ All optional arguments which can be passed via the command line can also be conf ``` toml # Print debug information to console debug = true +# Do not warn for links which redirect to the given URL +do-not-warn-for-redirect-to=["http*://crates.io*"] # Do not check web links offline = true # Check the exact file extension when searching for a file diff --git a/src/cli.rs b/src/cli.rs index fd401a7..a13b8ae 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -29,6 +29,13 @@ pub fn parse_args() -> Config { .arg(arg!(-o --offline "Do not check web links") .alias("no-web-links") .required(false)) + .arg(Arg::new("do-not-warn-for-redirect-to") + .long("do-not-warn-for-redirect-to") + .value_name("LINKS") + .value_delimiter(',') + .action(ArgAction::Append) + .help("Comma separated list of links which will be ignored") + .required(false)) .arg(Arg::new("match-file-extension") .long("match-file-extension") .short('e') @@ -87,6 +94,10 @@ pub fn parse_args() -> Config { if matches.get_flag("debug") { opt.debug = Some(true); } + + if let Some(do_not_warn_for_redirect_to) = matches.get_many::("do-not-warn-for-redirect-to") { + opt.do_not_warn_for_redirect_to = Some(do_not_warn_for_redirect_to.map(|x| x.to_string()).collect()); + } if let Some(throttle_str) = matches.get_one::("throttle") { let throttle = throttle_str.parse::().unwrap(); diff --git a/src/lib.rs b/src/lib.rs index faa31f1..79fb24b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,6 +36,8 @@ const PARALLEL_REQUESTS: usize = 20; #[derive(Default, Debug, Deserialize)] pub struct OptionalConfig { pub debug: Option, + #[serde(rename(deserialize = "do-not-warn-for-redirect-to"))] + pub do_not_warn_for_redirect_to: Option>, #[serde(rename(deserialize = "markup-types"))] pub markup_types: Option>, pub offline: Option, @@ -79,6 +81,7 @@ impl fmt::Display for Config { " Debug: {:?} Dir: {} +DoNotWarnForRedirectTo: {:?} Types: {:?} Offline: {} MatchExt: {} @@ -88,6 +91,7 @@ IgnorePath: {:?} Throttle: {} ms", self.optional.debug.unwrap_or(false), self.directory.to_str().unwrap_or_default(), + self.optional.do_not_warn_for_redirect_to, markup_types_str, self.optional.offline.unwrap_or_default(), self.optional.match_file_extension.unwrap_or_default(), @@ -186,6 +190,12 @@ pub async fn run(config: &Config) -> Result<(), ()> { } } + + let do_not_warn_for_redirect_to: Arc> = Arc::new(match &config.optional.do_not_warn_for_redirect_to { + Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(), + None => vec![], + }); + let throttle = config.optional.throttle.unwrap_or_default() > 0; info!("Throttle HTTP requests to same host: {:?}", throttle); let waits = Arc::new(Mutex::new(HashMap::new())); @@ -193,6 +203,7 @@ pub async fn run(config: &Config) -> Result<(), ()> { let mut buffered_stream = stream::iter(link_target_groups.keys()) .map(|target| { let waits = waits.clone(); + let do_not_warn_for_redirect_to = Arc::clone(&do_not_warn_for_redirect_to); async move { if throttle && target.link_type == LinkType::Http { let parsed = match Url::parse(&target.target) { @@ -244,7 +255,7 @@ pub async fn run(config: &Config) -> Result<(), ()> { } let result_code = - link_validator::check(&target.target, &target.link_type, config).await; + link_validator::check(&target.target, &target.link_type, config, &do_not_warn_for_redirect_to).await; FinalResult { target: target.clone(), diff --git a/src/link_validator/http.rs b/src/link_validator/http.rs index eb3d8f5..7199441 100644 --- a/src/link_validator/http.rs +++ b/src/link_validator/http.rs @@ -6,12 +6,13 @@ use reqwest::Client; use reqwest::Method; use reqwest::Request; use reqwest::StatusCode; +use wildmatch::WildMatch; -pub async fn check_http(target: &str) -> LinkCheckResult { +pub async fn check_http(target: &str, do_not_warn_for_redirect_to: &Vec) -> LinkCheckResult { debug!("Check http link target {:?}", target); let url = reqwest::Url::parse(target).expect("URL of unknown type"); - match http_request(&url).await { + match http_request(&url, do_not_warn_for_redirect_to).await { Ok(response) => response, Err(error_msg) => LinkCheckResult::Failed(format!("Http(s) request failed. {}", error_msg)), } @@ -25,7 +26,7 @@ fn new_request(method: Method, url: &reqwest::Url) -> Request { req } -async fn http_request(url: &reqwest::Url) -> reqwest::Result { +async fn http_request(url: &reqwest::Url, do_not_warn_for_redirect_to: &Vec) -> reqwest::Result { lazy_static! { static ref CLIENT: Client = reqwest::Client::builder() .brotli(true) @@ -55,7 +56,7 @@ async fn http_request(url: &reqwest::Url) -> reqwest::Result { let status = response.status(); if status.is_success() { - if response.url() == url { + if response.url() == url || do_not_warn_for_redirect_to.iter().any(|x| x.matches(response.url().as_ref())) { Ok(LinkCheckResult::Ok) } else { Ok(LinkCheckResult::Warning("Request was redirected to ".to_string() + response.url().as_ref())) @@ -86,43 +87,62 @@ mod test { #[tokio::test] async fn check_http_is_available() { - let result = check_http("https://gitlab.com/becheran/mlc").await; + let result = check_http("https://gitlab.com/becheran/mlc", &vec![]).await; assert_eq!(result, LinkCheckResult::Ok); } #[tokio::test] async fn check_http_is_redirection() { - let result = check_http("http://gitlab.com/becheran/mlc").await; + let result = check_http("http://gitlab.com/becheran/mlc", &vec![]).await; assert_eq!(result, LinkCheckResult::Warning("Request was redirected to https://gitlab.com/becheran/mlc".to_string())); } + + #[tokio::test] + async fn check_http_redirection_do_not_warn_if_ignored() { + // we ignore redirections to the 'https'-version + let result = check_http("http://gitlab.com/becheran/mlc", &vec![WildMatch::new("https://gitlab.com/becheran/mlc")]).await; + assert_eq!(result, LinkCheckResult::Ok); + } + + #[tokio::test] + async fn check_http_redirection_do_not_warn_if_ignored_star_pattern() { + let result = check_http("http://gitlab.com/becheran/mlc", &vec![WildMatch::new("*")]).await; + assert_eq!(result, LinkCheckResult::Ok); + } + #[tokio::test] + async fn check_http_redirection_do_warn_if_ignored_mismatch() { + let result = check_http("http://gitlab.com/becheran/mlc", &vec![WildMatch::new("http://www.google.com")]).await; + assert_eq!(result, LinkCheckResult::Warning("Request was redirected to https://gitlab.com/becheran/mlc".to_string())); + } + #[tokio::test] async fn check_http_is_redirection_failure() { - let result = check_http("http://github.com/fake-page").await; + let result = check_http("http://github.com/fake-page", &vec![]).await; assert_eq!(result, LinkCheckResult::Failed("404 - Not Found".to_string())); } #[tokio::test] async fn check_https_crates_io_available() { - let result = check_http("https://crates.io").await; + let result = check_http("https://crates.io", &vec![]).await; assert_eq!(result, LinkCheckResult::Ok); } #[tokio::test] async fn check_http_request_with_hash() { - let result = check_http("https://gitlab.com/becheran/mlc#bla").await; + let result = check_http("https://gitlab.com/becheran/mlc#bla", &vec![]).await; assert_eq!(result, LinkCheckResult::Ok); } #[tokio::test] async fn check_http_request_redirection_with_hash() { - let result = check_http("http://gitlab.com/becheran/mlc#bla").await; + let result = check_http("http://gitlab.com/becheran/mlc#bla", &vec![]).await; assert_eq!(result, LinkCheckResult::Warning("Request was redirected to https://gitlab.com/becheran/mlc".to_string())); } #[tokio::test] async fn check_wrong_http_request() { - let result = check_http("https://doesNotExist.me/even/less/likelly").await; + let result = check_http("https://doesNotExist.me/even/less/likelly", &vec![]).await; assert!(result != LinkCheckResult::Ok); } } diff --git a/src/link_validator/mod.rs b/src/link_validator/mod.rs index b453103..a9091cc 100644 --- a/src/link_validator/mod.rs +++ b/src/link_validator/mod.rs @@ -12,6 +12,7 @@ use mail::check_mail; pub use link_type::get_link_type; pub use link_type::LinkType; +use wildmatch::WildMatch; #[derive(Debug, Eq, PartialEq, Clone)] pub enum LinkCheckResult { @@ -34,7 +35,7 @@ pub async fn resolve_target_link( } } -pub async fn check(link_target: &str, link_type: &LinkType, config: &Config) -> LinkCheckResult { +pub async fn check(link_target: &str, link_type: &LinkType, config: &Config, do_not_warn_for_redirect_to: &Vec) -> LinkCheckResult { info!("Check link {}.", &link_target); match link_type { LinkType::Ftp => LinkCheckResult::NotImplemented(format!( @@ -51,7 +52,7 @@ pub async fn check(link_target: &str, link_type: &LinkType, config: &Config) -> "Ignore web link because of the offline flag.".to_string(), ) } else { - check_http(link_target).await + check_http(link_target, do_not_warn_for_redirect_to).await } } LinkType::FileSystem => check_filesystem(link_target, config).await, diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index 0bce94a..0108f5b 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -13,6 +13,7 @@ async fn end_to_end() { directory: benches_dir().join("benchmark"), optional: OptionalConfig { debug: None, + do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: None, match_file_extension: None, @@ -37,6 +38,7 @@ async fn end_to_end_different_root() { directory: test_files.clone(), optional: OptionalConfig { debug: Some(true), + do_not_warn_for_redirect_to: None, markup_types: Some(vec![MarkupType::Markdown]), offline: None, match_file_extension: None,