Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pr and issue comment scanning capabilities #1

Open
wants to merge 1 commit into
base: v2-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
/truffleHog.egg-info/
**/__pycache__/
**/*.pyc
env/
env/
.vscode
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ Have questions? Feedback? Jump in slack and hang out with me
https://join.slack.com/t/trufflehog-community/shared_invite/zt-pw2qbi43-Aa86hkiimstfdKH9UCpPzQ

## NEW
truffleHog now has the ability to scan comments in both Pull Requests and Issues.

```
trufflehog --regex --entropy=False --pr_and_issue_comments --github_username=<username> --github_token=<personal_access_token> https://github.com/trufflesecurity/trufflehog.git
```

truffleHog previously functioned by running entropy checks on git diffs. This functionality still exists, but high signal regex checks have been added, and the ability to suppress entropy checking has also been added.


Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
GitPython==3.0.6
unittest2==1.1.0
pytest-cov==2.5.1
codecov==2.0.15
codecov==2.1.12
truffleHogRegexes==0.0.7
ratelimiter==1.2.0
126 changes: 126 additions & 0 deletions truffleHog/truffleHog.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
import tempfile
import os
import re
import requests
import json
import stat
from git import Repo
from git import NULL_TREE
from ratelimiter import RateLimiter
from truffleHogRegexes.regexChecks import regexes


Expand All @@ -42,6 +44,9 @@ def main():
'effectively excluded via the --include_paths option.')
parser.add_argument("--repo_path", type=str, dest="repo_path", help="Path to the cloned repo. If provided, git_url will not be used")
parser.add_argument("--cleanup", dest="cleanup", action="store_true", help="Clean up all temporary result files")
parser.add_argument("--pr_and_issue_comments", dest="do_comments", action="store_true", help="Enable PR & Issue Comment checks")
parser.add_argument("--github_username", dest="github_username", help="GitHub Username necessary for analysing PR & Issue Comments")
parser.add_argument("--github_token", dest="github_token", help="GitHub Personal Access Token necessary for analysing PR & Issue Comments")
parser.add_argument('git_url', type=str, help='URL for secret searching')
parser.set_defaults(regex=False)
parser.set_defaults(rules={})
Expand All @@ -52,6 +57,7 @@ def main():
parser.set_defaults(branch=None)
parser.set_defaults(repo_path=None)
parser.set_defaults(cleanup=False)
parser.set_defaults(do_comments=False)
args = parser.parse_args()
rules = {}
if args.rules:
Expand Down Expand Up @@ -91,6 +97,17 @@ def main():

output = find_strings(args.git_url, args.since_commit, args.max_depth, args.output_json, args.do_regex, do_entropy,
surpress_output=False, custom_regexes=regexes, branch=args.branch, repo_path=args.repo_path, path_inclusions=path_inclusions, path_exclusions=path_exclusions, allow=allow)

if args.do_comments:
if args.github_username == "":
raise(Exception("GitHub comment scanning requires a --github_username to be provided"))

if args.github_token == "":
raise(Exception("GitHub comment scanning requires a --github_token to be provided"))

commentIssues = find_strings_in_comments(args.git_url, args.github_username, args.github_token)
handle_results(output, output["issues_path"], commentIssues)

project_path = output["project_path"]
if args.cleanup:
clean_up(output)
Expand Down Expand Up @@ -183,6 +200,8 @@ def print_results(printJson, issue):
print(json.dumps(issue, sort_keys=True))
else:
print("~~~~~~~~~~~~~~~~~~~~~")
type = "{}Type: {}{}".format(bcolors.OKGREEN, "Commit", bcolors.ENDC)
print(type)
reason = "{}Reason: {}{}".format(bcolors.OKGREEN, reason, bcolors.ENDC)
print(reason)
dateStr = "{}Date: {}{}".format(bcolors.OKGREEN, commit_time, bcolors.ENDC)
Expand All @@ -205,6 +224,25 @@ def print_results(printJson, issue):
print(commitStr)
print(printableDiff.encode('utf-8'))
print("~~~~~~~~~~~~~~~~~~~~~")

def print_comment_results(printJson, issue):
if printJson:
print(json.dumps(issue, sort_keys=True))
else:
print("~~~~~~~~~~~~~~~~~~~~~")
type = "{}Type: {}{}".format(bcolors.OKGREEN, issue['type'], bcolors.ENDC)
print(type)
reason = "{}Reason: {}{}".format(bcolors.OKGREEN, issue['reason'], bcolors.ENDC)
print(reason)
createdDateStr = "{}Date: {}{}".format(bcolors.OKGREEN, issue['createdAt'], bcolors.ENDC)
print(createdDateStr)
commentURLStr = "{}URL: {}{}".format(bcolors.OKGREEN, issue['htmlUrl'], bcolors.ENDC)
print(commentURLStr)
blameStr = "{}Author: {}{}".format(bcolors.OKGREEN, f"{issue['blameUsername']} [{issue['blameId']}]", bcolors.ENDC)
print(blameStr)
commentBodyStr = "{}Comment: \n{}{}".format(bcolors.OKGREEN, issue['printDiff'], bcolors.ENDC)
print(commentBodyStr)
print("~~~~~~~~~~~~~~~~~~~~~")

def find_entropy(printableDiff, commit_time, branch_name, prev_commit, blob, commitHash):
stringsFound = []
Expand Down Expand Up @@ -261,6 +299,35 @@ def regex_check(printableDiff, commit_time, branch_name, prev_commit, blob, comm
regex_matches.append(foundRegex)
return regex_matches

def comment_regex_check(prComment, type, custom_regexes={}):
if custom_regexes:
secret_regexes = custom_regexes
else:
secret_regexes = regexes

regex_matches = []

if prComment["body"] == "":
return regex_matches

for key in secret_regexes:
found_strings = secret_regexes[key].findall(prComment["body"])
for found_string in found_strings:
found_diff = prComment["body"].replace(found_string, bcolors.ENDC + bcolors.WARNING + str(found_string) + bcolors.ENDC + bcolors.OKGREEN)
if found_strings:
foundRegex = {}
foundRegex['type'] = type
foundRegex['createdAt'] = prComment["created_at"]
foundRegex['updatedAt'] = prComment["updated_at"]
foundRegex['htmlUrl'] = prComment["html_url"]
foundRegex['blameId'] = prComment["user"]["id"]
foundRegex['blameUsername'] = prComment["user"]["login"]
foundRegex['stringsFound'] = found_strings
foundRegex['printDiff'] = found_diff
foundRegex['reason'] = key
regex_matches.append(foundRegex)
return regex_matches

def diff_worker(diff, curr_commit, prev_commit, branch_name, commitHash, custom_regexes, do_entropy, do_regex, printJson, surpress_output, path_inclusions, path_exclusions, allow):
issues = []
for blob in diff:
Expand Down Expand Up @@ -319,6 +386,65 @@ def path_included(blob, include_patterns=None, exclude_patterns=None):
return False
return True

@RateLimiter(max_calls=1000, period=60)
def get_prs(baseProjectUrl, authHeaders):
response = requests.get(f'{baseProjectUrl}/pulls?state=all&per_page=100&page=1', auth=authHeaders)
response.raise_for_status()
prs = response.json()

while 'next' in response.links.keys():
response=requests.get(response.links['next']['url'], auth=authHeaders)
response.raise_for_status()
prs.extend(response.json())

return prs

@RateLimiter(max_calls=1000, period=60)
def get_issues(baseProjectUrl, authHeaders):
response = requests.get(f'{baseProjectUrl}/issues?state=all&per_page=100&page=1', auth=authHeaders)
response.raise_for_status()
issues = response.json()

while 'next' in response.links.keys():
response=requests.get(response.links['next']['url'], auth=authHeaders)
response.raise_for_status()
issues.extend(response.json())

return issues

def find_strings_in_comments(github_url, github_username, github_token, printJson=False):
parts = github_url.split("/")
owner = parts[-2]
repo = parts[-1].split(".")[0]
repoUrl=f'https://api.github.com/repos/{owner}/{repo}'
auth=(github_username, github_token)
regex_matches = []

prs = get_prs(repoUrl, auth)
for pr in prs:
response = requests.get(pr["review_comments_url"], auth=auth)
response.raise_for_status()
pr_comments = response.json()

for pr_comment in pr_comments:
regex_matches.extend(comment_regex_check(pr_comment, "PR Comment"))

issues = get_issues(repoUrl, auth)
for issue in issues:
if issue["comments"] == 0:
continue

response = requests.get(issue["comments_url"], auth=auth)
response.raise_for_status()
issue_comments = response.json()

for issue_comment in issue_comments:
regex_matches.extend(comment_regex_check(issue_comment, "Issue Comment"))

for match in regex_matches:
print_comment_results(printJson, match)

return regex_matches

def find_strings(git_url, since_commit=None, max_depth=1000000, printJson=False, do_regex=False, do_entropy=True, surpress_output=True,
custom_regexes={}, branch=None, repo_path=None, path_inclusions=None, path_exclusions=None, allow={}):
Expand Down