forked from trufflesecurity/trufflehog
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(git): scan commit metadata (trufflesecurity#2713)
This fixes trufflesecurity#2683. It scans the commit author, committer (which is typically GitHub <[email protected]> for GitHub, but can be different), and message. It also scans Git notes.
- Loading branch information
Showing
3 changed files
with
501 additions
and
186 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ import ( | |
|
||
const ( | ||
// defaultDateFormat is the standard date format for git. | ||
defaultDateFormat = "Mon Jan 02 15:04:05 2006 -0700" | ||
defaultDateFormat = "Mon Jan 2 15:04:05 2006 -0700" | ||
|
||
// defaultMaxDiffSize is the maximum size for a diff. Larger diffs will be cut off. | ||
defaultMaxDiffSize = 2 * 1024 * 1024 * 1024 // 2GB | ||
|
@@ -106,11 +106,12 @@ func (d *Diff) finalize() error { | |
|
||
// Commit contains commit header info and diffs. | ||
type Commit struct { | ||
Hash string | ||
Author string | ||
Date time.Time | ||
Message strings.Builder | ||
Size int // in bytes | ||
Hash string | ||
Author string | ||
Committer string | ||
Date time.Time | ||
Message strings.Builder | ||
Size int // in bytes | ||
|
||
hasDiffs bool | ||
} | ||
|
@@ -131,10 +132,15 @@ const ( | |
CommitLine | ||
MergeLine | ||
AuthorLine | ||
DateLine | ||
AuthorDateLine | ||
CommitterLine | ||
CommitterDateLine | ||
MessageStartLine | ||
MessageLine | ||
MessageEndLine | ||
NotesStartLine | ||
NotesLine | ||
NotesEndLine | ||
DiffLine | ||
ModeLine | ||
IndexLine | ||
|
@@ -152,10 +158,15 @@ func (state ParseState) String() string { | |
"CommitLine", | ||
"MergeLine", | ||
"AuthorLine", | ||
"DateLine", | ||
"AuthorDateLine", | ||
"CommitterLine", | ||
"CommitterDateLine", | ||
"MessageStartLine", | ||
"MessageLine", | ||
"MessageEndLine", | ||
"NotesStartLine", | ||
"NotesLine", | ||
"NotesEndLine", | ||
"DiffLine", | ||
"ModeLine", | ||
"IndexLine", | ||
|
@@ -209,7 +220,15 @@ func NewParser(options ...Option) *Parser { | |
// RepoPath parses the output of the `git log` command for the `source` path. | ||
// The Diff chan will return diffs in the order they are parsed from the log. | ||
func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool, excludedGlobs []string, isBare bool) (chan *Diff, error) { | ||
args := []string{"-C", source, "log", "-p", "--full-history", "--date=format:%a %b %d %H:%M:%S %Y %z"} | ||
args := []string{ | ||
"-C", source, | ||
"log", | ||
"--patch", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch | ||
"--full-history", | ||
"--date=format:%a %b %d %H:%M:%S %Y %z", | ||
"--pretty=fuller", // https://git-scm.com/docs/git-log#_pretty_formats | ||
"--notes", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---notesltrefgt | ||
} | ||
if abbreviatedLog { | ||
args = append(args, "--diff-filter=AM") | ||
} | ||
|
@@ -373,16 +392,23 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan | |
latestState = MergeLine | ||
case isAuthorLine(isStaged, latestState, line): | ||
latestState = AuthorLine | ||
currentCommit.Author = strings.TrimRight(string(line[8:]), "\n") | ||
|
||
case isDateLine(isStaged, latestState, line): | ||
latestState = DateLine | ||
currentCommit.Author = strings.TrimSpace(string(line[8:])) | ||
case isAuthorDateLine(isStaged, latestState, line): | ||
latestState = AuthorDateLine | ||
|
||
date, err := time.Parse(c.dateFormat, strings.TrimSpace(string(line[6:]))) | ||
date, err := time.Parse(c.dateFormat, strings.TrimSpace(string(line[12:]))) | ||
if err != nil { | ||
ctx.Logger().V(2).Info("Could not parse date from git stream.", "error", err) | ||
ctx.Logger().Error(err, "failed to parse commit date", "commit", currentCommit.Hash, "latestState", latestState.String()) | ||
latestState = ParseFailure | ||
continue | ||
} | ||
currentCommit.Date = date | ||
case isCommitterLine(isStaged, latestState, line): | ||
latestState = CommitterLine | ||
currentCommit.Committer = strings.TrimSpace(string(line[8:])) | ||
case isCommitterDateLine(isStaged, latestState, line): | ||
latestState = CommitterDateLine | ||
// NoOp | ||
case isMessageStartLine(isStaged, latestState, line): | ||
latestState = MessageStartLine | ||
// NoOp | ||
|
@@ -393,6 +419,17 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan | |
case isMessageEndLine(isStaged, latestState, line): | ||
latestState = MessageEndLine | ||
// NoOp | ||
case isNotesStartLine(isStaged, latestState, line): | ||
latestState = NotesStartLine | ||
|
||
currentCommit.Message.WriteString("\n") | ||
currentCommit.Message.Write(line) | ||
case isNotesLine(isStaged, latestState, line): | ||
latestState = NotesLine | ||
currentCommit.Message.Write(line[4:]) // Notes are indented by 4 spaces. | ||
case isNotesEndLine(isStaged, latestState, line): | ||
latestState = NotesEndLine | ||
// NoOp | ||
case isDiffLine(isStaged, latestState, line): | ||
latestState = DiffLine | ||
|
||
|
@@ -577,20 +614,42 @@ func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool { | |
return false | ||
} | ||
|
||
// Date: Tue Aug 10 15:20:40 2021 +0100 | ||
func isDateLine(isStaged bool, latestState ParseState, line []byte) bool { | ||
// AuthorDate: Tue Aug 10 15:20:40 2021 +0100 | ||
func isAuthorDateLine(isStaged bool, latestState ParseState, line []byte) bool { | ||
if isStaged || latestState != AuthorLine { | ||
return false | ||
} | ||
if len(line) > 7 && bytes.Equal(line[:5], []byte("Date:")) { | ||
if len(line) > 10 && bytes.Equal(line[:11], []byte("AuthorDate:")) { | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
// Commit: Bill Rich <[email protected]> | ||
func isCommitterLine(isStaged bool, latestState ParseState, line []byte) bool { | ||
if isStaged || latestState != AuthorDateLine { | ||
return false | ||
} | ||
if len(line) > 8 && bytes.Equal(line[:7], []byte("Commit:")) { | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
// Line directly after Date with only a newline. | ||
// CommitDate: Wed Apr 17 19:59:28 2024 -0400 | ||
func isCommitterDateLine(isStaged bool, latestState ParseState, line []byte) bool { | ||
if isStaged || latestState != CommitterLine { | ||
return false | ||
} | ||
if len(line) > 10 && bytes.Equal(line[:11], []byte("CommitDate:")) { | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
// Line directly after CommitterDate with only a newline. | ||
func isMessageStartLine(isStaged bool, latestState ParseState, line []byte) bool { | ||
if isStaged || latestState != DateLine { | ||
if isStaged || latestState != CommitterDateLine { | ||
return false | ||
} | ||
// TODO: Improve the implementation of this and isMessageEndLine | ||
|
@@ -622,15 +681,51 @@ func isMessageEndLine(isStaged bool, latestState ParseState, line []byte) bool { | |
return false | ||
} | ||
|
||
// `Notes:` or `Notes (context):` | ||
// See https://tylercipriani.com/blog/2022/11/19/git-notes-gits-coolest-most-unloved-feature/ | ||
func isNotesStartLine(isStaged bool, latestState ParseState, line []byte) bool { | ||
if isStaged || latestState != MessageEndLine { | ||
return false | ||
} | ||
if len(line) > 5 && bytes.Equal(line[:5], []byte("Notes")) { | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
// Line after NotesStartLine that starts with 4 spaces | ||
func isNotesLine(isStaged bool, latestState ParseState, line []byte) bool { | ||
if isStaged || !(latestState == NotesStartLine || latestState == NotesLine) { | ||
return false | ||
} | ||
if len(line) > 4 && bytes.Equal(line[:4], []byte(" ")) { | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
// Line directly after NotesLine with only a newline. | ||
func isNotesEndLine(isStaged bool, latestState ParseState, line []byte) bool { | ||
if isStaged || latestState != NotesLine { | ||
return false | ||
} | ||
if len(strings.TrimRight(string(line[:]), "\r\n")) == 0 { | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
// diff --git a/internal/addrs/move_endpoint_module.go b/internal/addrs/move_endpoint_module.go | ||
func isDiffLine(isStaged bool, latestState ParseState, line []byte) bool { | ||
if !(latestState == MessageStartLine || // Empty commit messages can go from MessageStart->Diff | ||
latestState == MessageEndLine || | ||
latestState == NotesEndLine || | ||
latestState == BinaryFileLine || | ||
latestState == ModeLine || | ||
latestState == IndexLine || | ||
latestState == HunkContentLine || | ||
latestState == ParseFailure) { | ||
if latestState == Initial && !isStaged { | ||
if !(isStaged && latestState == Initial) { | ||
return false | ||
} | ||
} | ||
|
Oops, something went wrong.