diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 0000000..7f996c3 --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,7 @@ +- id: urlchecker + name: urlchecker + description: Look for broken URLs in your static files + entry: urlchecker + language: python + language_version: python3 + files: '\.(rst|md|markdown|py|tex)$' diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d14642a --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2022 Vanessa Sochat and Ayoub Malek + + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..5bf6c6a --- /dev/null +++ b/README.md @@ -0,0 +1,52 @@ +
+ +# urlchecker pre-commit + +You can use urlchecker-python with [pre-commit](https://pre-commit.com/)! + +## Setup + +Add the following entry to your `.pre-commit-config.yaml` in the root of +your repository: + +```yaml +repos: +- repo: https://github.com/urlstechie/pre-commit + rev: 0.0.0 + hooks: + - id: urlchecker-check + additional_dependencies: [urlchecker>=0.0.29] +``` + +You can add additional args (those you would add to the check command) to further +customize the run: + + +```yaml +repos: +- repo: https://github.com/urlstechie/pre-commit + rev: 0.0.0 + hooks: + - id: urlchecker-check + additional_dependencies: [urlchecker>=0.0.29] +``` + +Note that the `--files` argument that previously accepted patterns for urlchecker +for this module is instead `--patterns`. The reason is because pre-commit is already +going to provide a list of filenames to check verbatim with the commit, and your +additional specification of `--patterns` is primarily to further filter this list. + +## Run + +And then you can run: + +```bash +$ pre-commit run +``` + +**under development** + +## Support + +If you need help, or want to suggest a project for the organization, +please [open an issue](https://github.com/urlstechie/pre-commit) diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..5516726 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,34 @@ +[metadata] +name = urlchecker_check +version = 0.0.0 +description = Run urlchecker to check urls in your static files +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/urlchecker/pre-commit +author = Vanessa Sochat +author_email = vsoch@users.noreply.github.com +license = MIT +license_file = LICENSE +classifiers = + License :: OSI Approved :: MIT License + Programming Language :: Python :: 3 + Programming Language :: Python :: 3 :: Only + Programming Language :: Python :: 3.7 + Programming Language :: Python :: 3.8 + Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 + Programming Language :: Python :: Implementation :: CPython + Programming Language :: Python :: Implementation :: PyPy + +[options] +py_modules = urlchecker_check +install_requires = + urlchecker>=0.0.29 +python_requires = >=3.7 + +[options.entry_points] +console_scripts = + urlchecker-check=urlchecker_check:main + +[bdist_wheel] +universal = True diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a03590f --- /dev/null +++ b/setup.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +from setuptools import setup + +setup() diff --git a/urlchecker_check.py b/urlchecker_check.py new file mode 100644 index 0000000..b002ccb --- /dev/null +++ b/urlchecker_check.py @@ -0,0 +1,263 @@ +from __future__ import annotations + +# Copyright (c) 2022 Vanessa Sochat and Ayoub Malek +# This source code is licensed under the terms of the MIT license. +# For a copy, see . + +import argparse +import re +import os +import sys +import logging + +from urlchecker.main.github import clone_repo, delete_repo +from urlchecker.core.fileproc import remove_empty +from urlchecker.core.check import UrlChecker +from urlchecker.logger import print_failure + +logger = logging.getLogger("urlchecker") + + +def get_parser(): + # Flatten parser to just be check command + parser = argparse.ArgumentParser(description="urlchecker python pre-commit") + parser.add_argument( + "path", + help="the local path or GitHub repository to clone and check", + ) + + parser.add_argument( + "-b", + "--branch", + help="if cloning, specify a branch to use (defaults to main)", + default="main", + ) + + parser.add_argument( + "--subfolder", + help="relative subfolder path within path (if not specified, we use root)", + ) + + parser.add_argument( + "--cleanup", + help="remove root folder after checking (defaults to False, no cleaup)", + default=False, + action="store_true", + ) + + parser.add_argument( + "--force-pass", + help="force successful pass (return code 0) regardless of result", + default=False, + action="store_true", + ) + + parser.add_argument( + "--no-print", + help="Skip printing results to the screen (defaults to printing to console).", + default=False, + action="store_true", + ) + + parser.add_argument( + "--verbose", + help="Print file names for failed urls in addition to the urls.", + default=False, + action="store_true", + ) + + parser.add_argument( + "--file-types", + dest="file_types", + help="comma separated list of file extensions to check (defaults to .md,.py)", + default=".md,.py", + ) + + # Here we separate out filenames (provided by pre-commit) and extra patterns + # to filter over (--patterns) which is --files in the urlchecker + parser.add_argument("filenames", nargs="*") + parser.add_argument( + "--patterns", + dest="patterns", + help="patterns to check.", + default="", + ) + + parser.add_argument( + "--exclude-urls", + help="comma separated links to exclude (no spaces)", + default="", + ) + + parser.add_argument( + "--exclude-patterns", + help="comma separated list of patterns to exclude (no spaces)", + default="", + ) + + parser.add_argument( + "--exclude-files", + help="comma separated list of files and patterns to exclude (no spaces)", + default="", + ) + + # Saving + + parser.add_argument( + "--save", + help="Path to a csv file to save results to.", + default=None, + ) + + # Timeouts + + parser.add_argument( + "--retry-count", + help="retry count upon failure (defaults to 2, one retry).", + type=int, + default=2, + ) + + parser.add_argument( + "--timeout", + help="timeout (seconds) to provide to the requests library (defaults to 5)", + type=int, + default=5, + ) + return parser + + +def check(args): + """ + Main entrypoint for running a check. We expect an args object with + arguments from the main client. From here we determine the path + to parse (or GitHub url to clone) and call the main check function + under main/check.py + + Args: + - args : the argparse ArgParser with parsed args + - extra : extra arguments not handled by the parser + """ + path = args.path + + # Case 1: specify present working directory + if not path or path == ".": + path = os.getcwd() + + # Case 2: git clone isn't supported for a pre-commit hook + elif re.search("^(git@|http)", path): + logging.error("Repository url %s detected, not supported for pre-commit hook.") + return 1 + + # Add subfolder to path + if args.subfolder: + path = os.path.join(path, args.subfolder) + + # By the time we get here, a path must exist + if not os.path.exists(path): + logger.error("Error %s does not exist." % path) + return 1 + + logging.debug("Path specified as present working directory, %s" % path) + + # Parse file types, and excluded urls and files (includes absolute and patterns) + file_types = args.file_types.split(",") + exclude_urls = remove_empty(args.exclude_urls.split(",")) + exclude_patterns = remove_empty(args.exclude_patterns.split(",")) + exclude_files = remove_empty(args.exclude_files.split(",")) + + # Do we have any patterns to filter (regular expressions)? + patterns = None + if args.patterns: + logger.debug("Found patterns of files to filter to.") + patterns = "(%s)" % "|".join(args.patterns) + + # Process the files + files = [] + for filename in args.filenames: + if not filename or not os.path.exists(filename): + logger.error("%s does not exist, skipping." % filename) + continue + if patterns and not re.search(patterns, filename): + continue + files.append(filename) + + # Alert user about settings + print(" original path: %s" % args.path) + print(" final path: %s" % path) + print(" subfolder: %s" % args.subfolder) + print(" branch: %s" % args.branch) + print(" cleanup: %s" % args.cleanup) + print(" file types: %s" % file_types) + print(" files: %s" % files) + print(" print all: %s" % (not args.no_print)) + print(" verbose: %s" % (args.verbose)) + print(" urls excluded: %s" % exclude_urls) + print(" url patterns excluded: %s" % exclude_patterns) + print(" file patterns excluded: %s" % exclude_files) + print(" force pass: %s" % args.force_pass) + print(" retry count: %s" % args.retry_count) + print(" save: %s" % args.save) + print(" timeout: %s" % args.timeout) + + # Instantiate a new checker with provided arguments + checker = UrlChecker( + path=path, + file_types=file_types, + include_patterns=files, + exclude_files=exclude_files, + print_all=not args.no_print, + ) + check_results = checker.run( + exclude_urls=exclude_urls, + exclude_patterns=exclude_patterns, + retry_count=args.retry_count, + timeout=args.timeout, + ) + + # save results to file, if save indicated + if args.save: + checker.save_results(args.save) + + # Case 1: We didn't find any urls to check + if not check_results["failed"] and not check_results["passed"]: + print("\n\n\U0001F937. No urls were collected.") + return 0 + + # Case 2: We had errors, print them for the user + if check_results["failed"]: + if args.verbose: + print("\n\U0001F914 Uh oh... The following urls did not pass:") + for file_name, result in checker.checks.items(): + if result["failed"]: + print_failure(file_name + ":") + for url in result["failed"]: + print_failure(" " + url) + else: + print("\n\U0001F914 Uh oh... The following urls did not pass:") + for failed_url in check_results["failed"]: + print_failure(failed_url) + + # If we have failures and it's not a force pass, exit with 1 + if not args.force_pass and check_results["failed"]: + return 1 + + # Finally, alert user if we are passing conditionally + if check_results["failed"]: + print("\n\U0001F928 Conditional pass force pass True.") + else: + print("\n\n\U0001F389 All URLS passed!") + return 0 + + +def main(argv: Sequence[str] | None = None) -> int: + + parser = get_parser() + args = parser.parse_args(argv) + + # Get the return value to return to pre-commit + return check(args) + + +if __name__ == "__main__": + raise SystemExit(main())