Skip to content

Commit

Permalink
Do Not Warn for Redirects to Given Patterns (#85)
Browse files Browse the repository at this point in the history
* feat: new `--do-not-warn-for-redirect-to` cli option

Mutes warnings caused by redirections which lead to an url matching the given patterns.

* doc(Readme): add the `do-not-warn-for-redirect-to` option to the example `.mlc.toml`
  • Loading branch information
EagleoutIce authored May 19, 2024
1 parent b6e56df commit d73269b
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 14 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ The following arguments are available:
| `<directory>` | | Only positional argument. Path to directory which shall be checked with all sub-dirs. Can also be a specific filename which shall be checked. |
| `--help` | `-h` | Print help |
| `--debug` | `-d` | Show verbose debug information |
| `--do-not-warn-for-redirect-to` | | Do not warn for links which redirect to the given URL. Allows the same link format as `--ignore-links`. For example, `--do-not-warn-for-redirect-to "http*://crates.io*"` will not warn for links which redirect to the `crates.io` website. |
| `--offline` | `-o` | Do not check any web links. Renamed from `--no-web-links` which is still an alias for downwards compatibility |
| `--match-file-extension` | `-e` | Set the flag, if the file extension shall be checked as well. For example the following markup link `[link](dir/file)` matches if for example a file called `file.md` exists in `dir`, but would fail when the `--match-file-extension` flag is set. |
| `--version` | `-V` | Print current version of mlc |
Expand All @@ -123,6 +124,8 @@ All optional arguments which can be passed via the command line can also be conf
``` toml
# Print debug information to console
debug = true
# Do not warn for links which redirect to the given URL
do-not-warn-for-redirect-to=["http*://crates.io*"]
# Do not check web links
offline = true
# Check the exact file extension when searching for a file
Expand Down
11 changes: 11 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ pub fn parse_args() -> Config {
.arg(arg!(-o --offline "Do not check web links")
.alias("no-web-links")
.required(false))
.arg(Arg::new("do-not-warn-for-redirect-to")
.long("do-not-warn-for-redirect-to")
.value_name("LINKS")
.value_delimiter(',')
.action(ArgAction::Append)
.help("Comma separated list of links which will be ignored")
.required(false))
.arg(Arg::new("match-file-extension")
.long("match-file-extension")
.short('e')
Expand Down Expand Up @@ -87,6 +94,10 @@ pub fn parse_args() -> Config {
if matches.get_flag("debug") {
opt.debug = Some(true);
}

if let Some(do_not_warn_for_redirect_to) = matches.get_many::<String>("do-not-warn-for-redirect-to") {
opt.do_not_warn_for_redirect_to = Some(do_not_warn_for_redirect_to.map(|x| x.to_string()).collect());
}

if let Some(throttle_str) = matches.get_one::<String>("throttle") {
let throttle = throttle_str.parse::<u32>().unwrap();
Expand Down
13 changes: 12 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ const PARALLEL_REQUESTS: usize = 20;
#[derive(Default, Debug, Deserialize)]
pub struct OptionalConfig {
pub debug: Option<bool>,
#[serde(rename(deserialize = "do-not-warn-for-redirect-to"))]
pub do_not_warn_for_redirect_to: Option<Vec<String>>,
#[serde(rename(deserialize = "markup-types"))]
pub markup_types: Option<Vec<markup::MarkupType>>,
pub offline: Option<bool>,
Expand Down Expand Up @@ -79,6 +81,7 @@ impl fmt::Display for Config {
"
Debug: {:?}
Dir: {}
DoNotWarnForRedirectTo: {:?}
Types: {:?}
Offline: {}
MatchExt: {}
Expand All @@ -88,6 +91,7 @@ IgnorePath: {:?}
Throttle: {} ms",
self.optional.debug.unwrap_or(false),
self.directory.to_str().unwrap_or_default(),
self.optional.do_not_warn_for_redirect_to,
markup_types_str,
self.optional.offline.unwrap_or_default(),
self.optional.match_file_extension.unwrap_or_default(),
Expand Down Expand Up @@ -186,13 +190,20 @@ pub async fn run(config: &Config) -> Result<(), ()> {
}
}


let do_not_warn_for_redirect_to: Arc<Vec<WildMatch>> = Arc::new(match &config.optional.do_not_warn_for_redirect_to {
Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(),
None => vec![],
});

let throttle = config.optional.throttle.unwrap_or_default() > 0;
info!("Throttle HTTP requests to same host: {:?}", throttle);
let waits = Arc::new(Mutex::new(HashMap::new()));
// See also http://patshaughnessy.net/2020/1/20/downloading-100000-files-using-async-rust
let mut buffered_stream = stream::iter(link_target_groups.keys())
.map(|target| {
let waits = waits.clone();
let do_not_warn_for_redirect_to = Arc::clone(&do_not_warn_for_redirect_to);
async move {
if throttle && target.link_type == LinkType::Http {
let parsed = match Url::parse(&target.target) {
Expand Down Expand Up @@ -244,7 +255,7 @@ pub async fn run(config: &Config) -> Result<(), ()> {
}

let result_code =
link_validator::check(&target.target, &target.link_type, config).await;
link_validator::check(&target.target, &target.link_type, config, &do_not_warn_for_redirect_to).await;

FinalResult {
target: target.clone(),
Expand Down
42 changes: 31 additions & 11 deletions src/link_validator/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ use reqwest::Client;
use reqwest::Method;
use reqwest::Request;
use reqwest::StatusCode;
use wildmatch::WildMatch;

pub async fn check_http(target: &str) -> LinkCheckResult {
pub async fn check_http(target: &str, do_not_warn_for_redirect_to: &Vec<WildMatch>) -> LinkCheckResult {
debug!("Check http link target {:?}", target);
let url = reqwest::Url::parse(target).expect("URL of unknown type");

match http_request(&url).await {
match http_request(&url, do_not_warn_for_redirect_to).await {
Ok(response) => response,
Err(error_msg) => LinkCheckResult::Failed(format!("Http(s) request failed. {}", error_msg)),
}
Expand All @@ -25,7 +26,7 @@ fn new_request(method: Method, url: &reqwest::Url) -> Request {
req
}

async fn http_request(url: &reqwest::Url) -> reqwest::Result<LinkCheckResult> {
async fn http_request(url: &reqwest::Url, do_not_warn_for_redirect_to: &Vec<WildMatch>) -> reqwest::Result<LinkCheckResult> {
lazy_static! {
static ref CLIENT: Client = reqwest::Client::builder()
.brotli(true)
Expand Down Expand Up @@ -55,7 +56,7 @@ async fn http_request(url: &reqwest::Url) -> reqwest::Result<LinkCheckResult> {

let status = response.status();
if status.is_success() {
if response.url() == url {
if response.url() == url || do_not_warn_for_redirect_to.iter().any(|x| x.matches(response.url().as_ref())) {
Ok(LinkCheckResult::Ok)
} else {
Ok(LinkCheckResult::Warning("Request was redirected to ".to_string() + response.url().as_ref()))
Expand Down Expand Up @@ -86,43 +87,62 @@ mod test {

#[tokio::test]
async fn check_http_is_available() {
let result = check_http("https://gitlab.com/becheran/mlc").await;
let result = check_http("https://gitlab.com/becheran/mlc", &vec![]).await;
assert_eq!(result, LinkCheckResult::Ok);
}

#[tokio::test]
async fn check_http_is_redirection() {
let result = check_http("http://gitlab.com/becheran/mlc").await;
let result = check_http("http://gitlab.com/becheran/mlc", &vec![]).await;
assert_eq!(result, LinkCheckResult::Warning("Request was redirected to https://gitlab.com/becheran/mlc".to_string()));
}

#[tokio::test]
async fn check_http_redirection_do_not_warn_if_ignored() {
// we ignore redirections to the 'https'-version
let result = check_http("http://gitlab.com/becheran/mlc", &vec![WildMatch::new("https://gitlab.com/becheran/mlc")]).await;
assert_eq!(result, LinkCheckResult::Ok);
}

#[tokio::test]
async fn check_http_redirection_do_not_warn_if_ignored_star_pattern() {
let result = check_http("http://gitlab.com/becheran/mlc", &vec![WildMatch::new("*")]).await;
assert_eq!(result, LinkCheckResult::Ok);
}

#[tokio::test]
async fn check_http_redirection_do_warn_if_ignored_mismatch() {
let result = check_http("http://gitlab.com/becheran/mlc", &vec![WildMatch::new("http://www.google.com")]).await;
assert_eq!(result, LinkCheckResult::Warning("Request was redirected to https://gitlab.com/becheran/mlc".to_string()));
}

#[tokio::test]
async fn check_http_is_redirection_failure() {
let result = check_http("http://github.com/fake-page").await;
let result = check_http("http://github.com/fake-page", &vec![]).await;
assert_eq!(result, LinkCheckResult::Failed("404 - Not Found".to_string()));
}

#[tokio::test]
async fn check_https_crates_io_available() {
let result = check_http("https://crates.io").await;
let result = check_http("https://crates.io", &vec![]).await;
assert_eq!(result, LinkCheckResult::Ok);
}

#[tokio::test]
async fn check_http_request_with_hash() {
let result = check_http("https://gitlab.com/becheran/mlc#bla").await;
let result = check_http("https://gitlab.com/becheran/mlc#bla", &vec![]).await;
assert_eq!(result, LinkCheckResult::Ok);
}

#[tokio::test]
async fn check_http_request_redirection_with_hash() {
let result = check_http("http://gitlab.com/becheran/mlc#bla").await;
let result = check_http("http://gitlab.com/becheran/mlc#bla", &vec![]).await;
assert_eq!(result, LinkCheckResult::Warning("Request was redirected to https://gitlab.com/becheran/mlc".to_string()));
}

#[tokio::test]
async fn check_wrong_http_request() {
let result = check_http("https://doesNotExist.me/even/less/likelly").await;
let result = check_http("https://doesNotExist.me/even/less/likelly", &vec![]).await;
assert!(result != LinkCheckResult::Ok);
}
}
5 changes: 3 additions & 2 deletions src/link_validator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use mail::check_mail;

pub use link_type::get_link_type;
pub use link_type::LinkType;
use wildmatch::WildMatch;

#[derive(Debug, Eq, PartialEq, Clone)]
pub enum LinkCheckResult {
Expand All @@ -34,7 +35,7 @@ pub async fn resolve_target_link(
}
}

pub async fn check(link_target: &str, link_type: &LinkType, config: &Config) -> LinkCheckResult {
pub async fn check(link_target: &str, link_type: &LinkType, config: &Config, do_not_warn_for_redirect_to: &Vec<WildMatch>) -> LinkCheckResult {
info!("Check link {}.", &link_target);
match link_type {
LinkType::Ftp => LinkCheckResult::NotImplemented(format!(
Expand All @@ -51,7 +52,7 @@ pub async fn check(link_target: &str, link_type: &LinkType, config: &Config) ->
"Ignore web link because of the offline flag.".to_string(),
)
} else {
check_http(link_target).await
check_http(link_target, do_not_warn_for_redirect_to).await
}
}
LinkType::FileSystem => check_filesystem(link_target, config).await,
Expand Down
2 changes: 2 additions & 0 deletions tests/end_to_end.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ async fn end_to_end() {
directory: benches_dir().join("benchmark"),
optional: OptionalConfig {
debug: None,
do_not_warn_for_redirect_to: None,
markup_types: Some(vec![MarkupType::Markdown]),
offline: None,
match_file_extension: None,
Expand All @@ -37,6 +38,7 @@ async fn end_to_end_different_root() {
directory: test_files.clone(),
optional: OptionalConfig {
debug: Some(true),
do_not_warn_for_redirect_to: None,
markup_types: Some(vec![MarkupType::Markdown]),
offline: None,
match_file_extension: None,
Expand Down

0 comments on commit d73269b

Please sign in to comment.