From 1afd1c717d4478f09ef004bc5864bf3119ffb2b7 Mon Sep 17 00:00:00 2001 From: willcl-ark Date: Tue, 4 Jun 2024 22:37:03 +0100 Subject: [PATCH] add --gitignore option Using this option will ignore all *.md and *.html files which are currently ignored by git. This is via `git ls-files --ignored --others --exclude-standard`. --- README.md | 9 +++++ src/cli.rs | 29 ++++++++++++---- src/lib.rs | 84 ++++++++++++++++++++++++++++++++++++++++----- tests/end_to_end.rs | 2 ++ 4 files changed, 109 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 9c287d5..acf8695 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,12 @@ Another example is to call *mlc* on a certain directory or file: mlc ./docs ``` +Alternatively you may want to ignore all files currently ignored by `git` (requires `git` binary to be found on $PATH) and set a root-dir for relative links: + +```bash +mlc --gitignore --root-dir . +``` + Call *mlc* with the `--help` flag to display all available cli arguments: ``` bash @@ -120,6 +126,7 @@ The following arguments are available: | `--match-file-extension` | `-e` | Set the flag, if the file extension shall be checked as well. For example the following markup link `[link](dir/file)` matches if for example a file called `file.md` exists in `dir`, but would fail when the `--match-file-extension` flag is set. | | `--version` | `-V` | Print current version of mlc | | `--ignore-path` | `-p` | Comma separated list of directories or files which shall be ignored. For example | +| `--gitignore` | `-g` | Ignore all files currently ignored by git (requires `git` binary to be available on $PATH). | | `--ignore-links` | `-i` | Comma separated list of links which shall be ignored. Use simple `?` and `*` wildcards. For example `--ignore-links "http*://crates.io*"` will skip all links to the crates.io website. See the [used lib](https://github.com/becheran/wildmatch) for more information. | | `--markup-types` | `-t` | Comma separated list list of markup types which shall be checked [possible values: md, html] | | `--root-dir` | `-r` | All links to the file system starting with a slash on linux or backslash on windows will use another virtual root dir. For example the link in a file `[link](/dir/other/file.md)` checked with the cli arg `--root-dir /env/another/dir` will let *mlc* check the existence of `/env/another/dir/dir/other/file.md`. | @@ -138,6 +145,8 @@ offline = true match-file-extension= true # List of files and directories which will be ignored ignore-path=["./ignore-me","./src"] +# Ignore all files ignored by git +gitignore = true # List of links which will be ignored ignore-links=["http://ignore-me.de/*","http://*.ignoresub-domain/*"] # List of markup types which shall be checked diff --git a/src/cli.rs b/src/cli.rs index f3155f7..3529dd8 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -96,6 +96,16 @@ pub fn parse_args() -> Config { .help("Path to the root folder used to resolve all relative paths") .required(false), ) + + .arg( + Arg::new("gitignore") + .long("gitignore") + .short('g') + .value_name("GIT") + .help("Ignore all files ignored by git") + .action(ArgAction::SetTrue) + .required(false), + ) .get_matches(); let default_dir = format!(".{}", &MAIN_SEPARATOR); @@ -148,18 +158,23 @@ pub fn parse_args() -> Config { } if let Some(ignore_path) = matches.get_many::("ignore-path") { - opt.ignore_path = Some(ignore_path.map(|x| Path::new(x).to_path_buf()).collect()); - } - if opt.ignore_path.is_some() { - opt.ignore_path.as_mut().unwrap().iter_mut().for_each(|p| { + let mut paths: Vec<_> = ignore_path.map(|x| Path::new(x).to_path_buf()).collect(); + for p in paths.iter_mut() { match fs::canonicalize(&p) { - Ok(p) => &p, + Ok(canonical_path) => { + *p = canonical_path; + } Err(e) => { println!("⚠ Warn: Ignore path {:?} not found. {:?}.", p, e); - &p + panic!("Exiting due to invalid ignore path."); } }; - }); + } + opt.ignore_path = Some(paths); + } + + if matches.get_flag("gitignore") { + opt.gitignore = Some(true); } if let Some(root_dir) = matches.get_one::("root-dir") { diff --git a/src/lib.rs b/src/lib.rs index 479faaf..20d9c52 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,10 @@ use serde::Deserialize; use std::collections::HashMap; use std::env; use std::fmt; +use std::fs; +use std::path::Path; use std::path::PathBuf; +use std::process::Command; use std::sync::Arc; use tokio::sync::Mutex; use tokio::time::{sleep_until, Duration, Instant}; @@ -49,6 +52,8 @@ pub struct OptionalConfig { pub ignore_path: Option>, #[serde(rename(deserialize = "root-dir"))] pub root_dir: Option, + #[serde(rename(deserialize = "gitignore"))] + pub gitignore: Option, pub throttle: Option, } @@ -80,13 +85,14 @@ impl fmt::Display for Config { f, " Debug: {:?} -Dir: {} +Dir: {} DoNotWarnForRedirectTo: {:?} -Types: {:?} +Types: {:?} Offline: {} MatchExt: {} RootDir: {} -IgnoreLinks: {} +Gitignore: {} +IgnoreLinks: {} IgnorePath: {:?} Throttle: {} ms", self.optional.debug.unwrap_or(false), @@ -96,6 +102,7 @@ Throttle: {} ms", self.optional.offline.unwrap_or_default(), self.optional.match_file_extension.unwrap_or_default(), root_dir_str, + self.optional.gitignore.unwrap_or_default(), ignore_str.join(","), ignore_path_str, self.optional.throttle.unwrap_or(0) @@ -125,6 +132,33 @@ fn find_all_links(config: &Config) -> Vec { links } +fn find_git_ignored_files() -> Option> { + let output = Command::new("git") + .arg("ls-files") + .arg("--ignored") + .arg("--others") + .arg("--exclude-standard") + .output() + .expect("Failed to execute 'git' command"); + + if output.status.success() { + let ignored_files = String::from_utf8(output.stdout) + .expect("Invalid UTF-8 sequence") + .lines() + .filter(|line| line.ends_with(".md") || line.ends_with(".html")) + .filter_map(|line| fs::canonicalize(Path::new(line.trim())).ok()) + .collect::>(); + Some(ignored_files) + } else { + eprintln!( + "git ls-files command failed: {}", + String::from_utf8_lossy(&output.stderr) + ); + None + } +} + + fn print_helper( link: &MarkupLink, status_code: &colored::ColoredString, @@ -168,7 +202,41 @@ pub async fn run(config: &Config) -> Result<(), ()> { Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(), None => vec![], }; + + let gitignored_files: Option> = if config.optional.gitignore.is_some() { + let files = find_git_ignored_files(); + debug!("Found gitignored files: {:?}", files); + files + } else { + None + }; + + let is_gitignore_enabled = gitignored_files.is_some(); + for link in &links { + let canonical_link_source = match fs::canonicalize(&link.source) { + Ok(path) => path, + Err(e) => { + warn!("Failed to canonicalize link source: {}. Error: {:?}", link.source, e); + continue; + } + }; + + if is_gitignore_enabled { + if let Some(ref gif) = gitignored_files { + if gif.iter().any(|path| path == &canonical_link_source) { + print_helper( + link, + &"Skip".green(), + "Ignore link because it is ignored by git.", + false, + ); + skipped += 1; + continue; + } + } + } + if ignore_links.iter().any(|m| m.matches(&link.target)) { print_helper( link, @@ -179,6 +247,7 @@ pub async fn run(config: &Config) -> Result<(), ()> { skipped += 1; continue; } + let link_type = get_link_type(&link.target); let target = resolve_target_link(link, &link_type, config).await; let t = Target { target, link_type }; @@ -190,11 +259,10 @@ pub async fn run(config: &Config) -> Result<(), ()> { } } - let do_not_warn_for_redirect_to: Arc> = - Arc::new(match &config.optional.do_not_warn_for_redirect_to { - Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(), - None => vec![], - }); + let do_not_warn_for_redirect_to: Arc> = Arc::new(match &config.optional.do_not_warn_for_redirect_to { + Some(s) => s.iter().map(|m| WildMatch::new(m)).collect(), + None => vec![], + }); let throttle = config.optional.throttle.unwrap_or_default() > 0; info!("Throttle HTTP requests to same host: {:?}", throttle); diff --git a/tests/end_to_end.rs b/tests/end_to_end.rs index 0108f5b..67675c5 100644 --- a/tests/end_to_end.rs +++ b/tests/end_to_end.rs @@ -24,6 +24,7 @@ async fn end_to_end() { fs::canonicalize("./benches/benchmark/markdown/ignore_me_dir").unwrap(), ]), root_dir: None, + gitignore: None, }, }; if let Err(e) = mlc::run(&config).await { @@ -46,6 +47,7 @@ async fn end_to_end_different_root() { ignore_path: None, throttle: None, root_dir: Some(test_files), + gitignore: None, }, }; if let Err(e) = mlc::run(&config).await {