Skip to content

Commit

Permalink
support SARIF format
Browse files Browse the repository at this point in the history
  • Loading branch information
David Wertenteil committed May 29, 2024
1 parent d0e7560 commit c16e63c
Show file tree
Hide file tree
Showing 8 changed files with 271 additions and 14 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: Build and Deploy containers

on:
# Always test on pull request
pull_request: []
pull_request:

# Deploy on merge to main
push:
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ on:
branches:
- master
pull_request:
branches_ignore: []

jobs:
formatting:
Expand Down
57 changes: 55 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ $ urlchecker check --help
usage: urlchecker check [-h] [-b BRANCH] [--subfolder SUBFOLDER] [--cleanup] [--serial] [--no-check-certs]
[--force-pass] [--no-print] [--verbose] [--file-types FILE_TYPES] [--files FILES]
[--exclude-urls EXCLUDE_URLS] [--exclude-patterns EXCLUDE_PATTERNS]
[--exclude-files EXCLUDE_FILES] [--save SAVE] [--retry-count RETRY_COUNT] [--timeout TIMEOUT]
[--exclude-files EXCLUDE_FILES] [--save SAVE] [--format FORMAT] [--retry-count RETRY_COUNT] [--timeout TIMEOUT]
path

positional arguments:
Expand Down Expand Up @@ -89,6 +89,7 @@ options:
--exclude-files EXCLUDE_FILES
comma separated list of files and patterns to exclude (no spaces)
--save SAVE Path to a csv file to save results to.
--format FORMAT format to save results to (csv or sarif), defaults to csv.
--retry-count RETRY_COUNT
retry count upon failure (defaults to 2, one retry).
--timeout TIMEOUT timeout (seconds) to provide to the requests library (defaults to 5)
Expand Down Expand Up @@ -266,7 +267,9 @@ $ urlchecker check --exclude-files=README.md,_config.yml

### Save Results

If you want to save your results to file, perhaps for some kind of record or
#### Save results in CSV format

If you want to save your results to file in csv format, perhaps for some kind of record or
other data analysis, you can provide the `--save` argument:

```bash
Expand Down Expand Up @@ -313,6 +316,56 @@ https://github.com/SuperKogito/URLs-checker/issues/1,failed
https://github.com/SuperKogito/URLs-checker/issues/4,failed
```

#### Save results in SARIF format

To save results in SARIF format, you can provide the `--format` argument with `sarif`:

```bash
$ urlchecker check --save results.sarif --format sarif .
```

This will produce a SARIF file with detailed information about each URL, including the exact line in the code where the URL was found, useful for integrating with tools that support SARIF for static analysis.
This output helps in pinpointing the exact issues directly in the code, improving the efficiency of addressing broken links.

```json
{
"version": "2.1.0",
"runs": [
{
"tool": {
"driver": {
"name": "UrlChecker",
"informationUri": "https://github.com/urlstechie/urlchecker-python",
"rules": [
{
"id": "URL001",
"name": "Invalid URL",
"shortDescription": { "text": "This URL is invalid or unreachable." },
"fullDescription": { "text": "This URL is invalid or unreachable." },
"helpUri": "https://example.com/rule/url001"
}
]
}
},
"results": [
{
"ruleId": "URL001",
"message": { "text": "URL https://github.com/SuperKogito/URLs-checker/README.md is invalid or unreachable." },
"locations": [
{
"physicalLocation": {
"artifactLocation": { "uri": "example_file.py" },
"region": { "startLine": 10 }
}
}
]
},
...
]
}
]
}
```

### Usage from Python

Expand Down
52 changes: 52 additions & 0 deletions tests/test_client_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,55 @@ def test_client_save(save):
if save:
if not os.path.exists(output_csv.name):
raise AssertionError

@pytest.mark.parametrize("save, output_format", [(True, ""), (True, "csv"), (True, "sarif")])
def test_client_save_format_csv(save, output_format):

# init config parser
config = configparser.ConfigParser()
config.read("./tests/_local_test_config.conf")

# init env variables
path = config["DEFAULT"]["git_path_test_value"]
file_types = config["DEFAULT"]["file_types_test_values"]
exclude_urls = config["DEFAULT"]["exclude_test_urls"]
exclude_patterns = config["DEFAULT"]["exclude_test_patterns"]

# Generate command
cmd = [
"urlchecker",
"check",
"--subfolder",
"test_files",
"--file-types",
file_types,
"--exclude-files",
"conf.py",
"--exclude-urls",
exclude_urls,
"--exclude_patterns",
exclude_patterns,
]

suffix = {
"csv": ".csv",
"sarif": ".sarif",
"" : ".csv"
}
# Write to file
if save:
output_file = tempfile.NamedTemporaryFile(suffix=suffix[output_format], prefix="urlchecker-")
cmd += ["--save", output_file.name]
if output_format:
cmd += ["--format", output_format]

# Add final path
cmd.append(path)

print(" ".join(cmd))
# execute script
pipe = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if save:
if not os.path.exists(output_file.name):
raise AssertionError

74 changes: 72 additions & 2 deletions tests/test_core_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import re
import sys
import pytest
import json
import configparser
from urlchecker.core.fileproc import get_file_paths
from urlchecker.main.github import clone_repo
Expand Down Expand Up @@ -95,9 +96,8 @@ def test_locally(local_folder_path, config_fname):
)
print("Done.")


@pytest.mark.parametrize("retry_count", [1, 3])
def test_check_run_save(tmp_path, retry_count):
def test_check_run_save_csv(tmp_path, retry_count):

# init vars
git_path = "https://github.com/urlstechie/urlchecker-test-repo"
Expand Down Expand Up @@ -161,3 +161,73 @@ def test_check_run_save(tmp_path, retry_count):
for line in lines[1:]:
url, result, filename = line.split(",")
assert not filename.startswith(root)

@pytest.mark.parametrize("retry_count", [1, 3])
def test_check_run_save_sarif(tmp_path, retry_count):

# init vars
git_path = "https://github.com/urlstechie/urlchecker-test-repo"
file_types = [".py", ".md"]
print_all = True
exclude_urls = [
"https://superkogito.github.io/figures/fig2.html",
"https://superkogito.github.io/figures/fig4.html",
]
exclude_patterns = ["https://superkogito.github.io/tables"]
timeout = 1
force_pass = False

# clone repo
base_path = clone_repo(git_path)

# get all file paths in subfolder specified
base_path = os.path.join(base_path, "test_files")
file_paths = get_file_paths(base_path, file_types)

# check repo urls
checker = UrlChecker(print_all=print_all, save_results_format="sarif")
check_results = checker.run(
file_paths=file_paths,
exclude_urls=exclude_urls,
exclude_patterns=exclude_patterns,
retry_count=retry_count,
timeout=timeout,
)

# Test saving to file
output_file = os.path.join(str(tmp_path), "results.sarif")
assert not os.path.exists(output_file)
saved_file = checker.save_results(output_file)
assert os.path.exists(output_file)

# Read in output file
with open(saved_file, "r") as file:
sarif_output = json.load(file)

# Verify SARIF output structure
assert "version" in sarif_output
assert sarif_output["version"] == "2.1.0"
assert "runs" in sarif_output
assert len(sarif_output["runs"]) > 0
assert "tool" in sarif_output["runs"][0]
assert "driver" in sarif_output["runs"][0]["tool"]
assert "name" in sarif_output["runs"][0]["tool"]["driver"]
assert sarif_output["runs"][0]["tool"]["driver"]["name"] == "UrlChecker"
assert "results" in sarif_output["runs"][0]

# Verify at least one result entry
assert len(sarif_output["runs"][0]["results"]) > 0

# Verify the structure of a result entry
result_entry = sarif_output["runs"][0]["results"][0]
assert "ruleId" in result_entry
assert result_entry["ruleId"] == "URL001"
assert "message" in result_entry
assert "text" in result_entry["message"]
assert "locations" in result_entry
assert len(result_entry["locations"]) > 0
assert "physicalLocation" in result_entry["locations"][0]
assert "artifactLocation" in result_entry["locations"][0]["physicalLocation"]
assert "uri" in result_entry["locations"][0]["physicalLocation"]["artifactLocation"]
assert "region" in result_entry["locations"][0]["physicalLocation"]
assert "startLine" in result_entry["locations"][0]["physicalLocation"]["region"]
9 changes: 8 additions & 1 deletion urlchecker/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,16 @@ def get_parser():

check.add_argument(
"--save",
help="Path to a csv file to save results to.",
help="Path to file to save results to.",
default=None,
)
check.add_argument(
"--format",
help="File format to save results to.",
default="csv",
choices=["csv", "sarif"],
type=str,
)

# Timeouts

Expand Down
3 changes: 3 additions & 0 deletions urlchecker/client/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def main(args, extra):
print(" force pass: %s" % args.force_pass)
print(" retry count: %s" % args.retry_count)
print(" save: %s" % args.save)
print(" format: %s" % args.format)
print(" timeout: %s" % args.timeout)

# Instantiate a new checker with provided arguments
Expand All @@ -86,6 +87,8 @@ def main(args, extra):
exclude_files=exclude_files,
print_all=not args.no_print,
serial=args.serial,
save_results_format=args.format,

)
check_results = checker.run(
exclude_urls=exclude_urls,
Expand Down
Loading

0 comments on commit c16e63c

Please sign in to comment.