Skip to content

Commit

Permalink
add simple regex support
Browse files Browse the repository at this point in the history
  • Loading branch information
noboruma committed Jul 2, 2024
1 parent dc4a601 commit 720c103
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 150 deletions.
3 changes: 0 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,6 @@ func main() {
// Process and store the read signatures
signature.ProcessSignatures(session.Config.Signatures)

// Build Hyperscan database for fast scanning
signature.BuildRegexes()

flag.Parse()

if *core.GetSession().Options.Debug {
Expand Down
18 changes: 15 additions & 3 deletions scan/scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io"
"path/filepath"
"sync"

"github.com/deepfence/SecretScanner/output"
"github.com/deepfence/SecretScanner/signature"
Expand Down Expand Up @@ -38,11 +39,17 @@ func ScanTypeString(st ScanType) string {

func scanFile(contents io.ReadSeeker, relPath, fileName, fileExtension, layer string, numSecrets *uint, matchedRuleSet map[uint]uint) ([]output.SecretFound, error) {

simpleSecrets, err := signature.MatchSimpleSignatures(contents, relPath, fileName, fileExtension, layer, numSecrets, matchedRuleSet)
if err != nil {
return nil, err
}

secrets, err := signature.MatchPatternSignatures(contents, relPath, fileName, fileExtension, layer, numSecrets, matchedRuleSet)
if err != nil {
return nil, err
}
return secrets, nil

return append(simpleSecrets, secrets...), nil
}

func Scan(ctx *tasks.ScanContext,
Expand Down Expand Up @@ -72,16 +79,17 @@ func Scan(ctx *tasks.ScanContext,
// results has to be 1 element max
// to avoid overwriting the buffer entries
results := make(chan []output.SecretFound)
defer close(results)

wg := sync.WaitGroup{}
wg.Add(1)
go func() {
defer wg.Done()
for malwares := range results {
for _, malware := range malwares {
outputFn(malware, scanID)
}
}
}()

genscan.ApplyScan(context.Background(), extract, func(f extractor.ExtractedFile) {
if ctx != nil {
err := ctx.Checkpoint("scan_phase")
Expand All @@ -98,5 +106,9 @@ func Scan(ctx *tasks.ScanContext,

results <- m
})

close(results)
wg.Wait()

return nil
}
218 changes: 74 additions & 144 deletions signature/signatures.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ import (
// "regexp/syntax"
// "strings"
"bufio"
"bytes"
"errors"
"io"
"math"
"regexp"
Expand All @@ -29,30 +27,21 @@ const (
MaxSecretLength = 1000 // Maximum length of secret to search to find exact position of secrets in large regex patterns
)

type HsInputOutputData struct {
inputData []byte
// Avoids extra memory during blacklist comparison, reduces memory pressure
inputDataLowerCase []byte
completeFilename string
layerID string
secretsFound *[]output.SecretFound
numSecrets *uint
matchedRuleSet map[uint]uint // Indicates if any rules macthed in the last iteration
}

// Different map data structures to map to appropriate signatures, DBs etc.
var (
simpleSignatureMap map[string][]core.ConfigSignature
matchRegexpMap map[string]*regexp.Regexp
patternSignatureMap map[string][]core.ConfigSignature
signatureIDMap map[int]core.ConfigSignature
matchSignatureMap map[string]map[string]core.ConfigSignature
)

// Initialize all the data structures
func init() {
// log.Infof("Initializing Patterns....")
simpleSignatureMap = make(map[string][]core.ConfigSignature)
patternSignatureMap = make(map[string][]core.ConfigSignature)
signatureIDMap = make(map[int]core.ConfigSignature)
matchRegexpMap = make(map[string]*regexp.Regexp)
matchSignatureMap = make(map[string]map[string]core.ConfigSignature)
}

// Scan to find complex pattern matches for the contents, path, filename and extension of this file
Expand All @@ -68,11 +57,11 @@ func init() {
func MatchPatternSignatures(contents io.ReadSeeker, path string, filename string, extension string, layerID string,
numSecrets *uint, matchedRuleSet map[uint]uint) ([]output.SecretFound, error) {
var tempSecretsFound []output.SecretFound
//var hsIOData HsInputOutputData
var matchingPart string
var matchingStr io.RuneReader

for _, part := range []string{ContentsPart, FilenamePart, PathPart, ExtPart} {
contents.Seek(0, io.SeekStart)
switch part {
case FilenamePart:
matchingPart = part
Expand All @@ -88,15 +77,6 @@ func MatchPatternSignatures(contents io.ReadSeeker, path string, filename string
matchingStr = bufio.NewReader(contents)
}

//hsIOData = HsInputOutputData{
// inputData: matchingStr,
// inputDataLowerCase: bytes.ToLower(matchingStr),
// completeFilename: path,
// layerID: layerID,
// secretsFound: &tempSecretsFound,
// numSecrets: numSecrets,
// matchedRuleSet: matchedRuleSet,
//}
for _, regex := range patternSignatureMap[matchingPart] {
indexes := regex.CompiledRegex.FindReaderSubmatchIndex(matchingStr)
if indexes != nil {
Expand Down Expand Up @@ -128,15 +108,69 @@ func MatchPatternSignatures(contents io.ReadSeeker, path string, filename string
return tempSecretsFound, nil
}

func MatchSimpleSignatures(contents io.ReadSeeker, path string, filename string, extension string, layerID string,
numSecrets *uint, matchedRuleSet map[uint]uint) ([]output.SecretFound, error) {
var tempSecretsFound []output.SecretFound
var matchingPart string
var matchingStr io.RuneReader

for _, part := range []string{ContentsPart, FilenamePart, PathPart, ExtPart} {
contents.Seek(0, io.SeekStart)
switch part {
case FilenamePart:
matchingPart = part
matchingStr = bufio.NewReader(strings.NewReader(filename))
case PathPart:
matchingPart = part
matchingStr = bufio.NewReader(strings.NewReader(path))
case ExtPart:
matchingPart = part
matchingStr = bufio.NewReader(strings.NewReader(extension))
case ContentsPart:
matchingPart = part
matchingStr = bufio.NewReader(contents)
}

indexes := matchRegexpMap[matchingPart].FindReaderSubmatchIndex(matchingStr)
if indexes != nil {
match := make([]byte, indexes[1]-indexes[0])
contents.Seek(int64(indexes[0]), io.SeekStart)
_, err := contents.Read(match)
if err != nil {
logrus.Infof("content read: %v", err)
}
matchStr := string(match)
signature := matchSignatureMap[matchingPart][matchStr]

tempSecretsFound = append(tempSecretsFound, output.SecretFound{
LayerID: layerID,
RuleID: signature.ID,
RuleName: signature.Name,
PartToMatch: part,
Match: matchStr,
Regex: signature.Regex,
Severity: signature.Severity,
SeverityScore: signature.SeverityScore,
MatchFromByte: indexes[0],
MatchToByte: indexes[1],
CompleteFilename: filename,
})
break
}
}

return tempSecretsFound, nil
}

// Process all the extracted signatures from config file, add severity and severity scores, finally
// store them in appropriate maps
// @parameters
// configSignatures - Extracted patterns from signature config file
func ProcessSignatures(configSignatures []core.ConfigSignature) {
var simpleContentSignatures []core.ConfigSignature
var simpleExtSignatures []core.ConfigSignature
var simpleFilenameSignatures []core.ConfigSignature
var simplePathSignatures []core.ConfigSignature
var simpleContentSignatures []string
var simpleExtSignatures []string
var simpleFilenameSignatures []string
var simplePathSignatures []string

var patternContentSignatures []core.ConfigSignature
var patternExtSignatures []core.ConfigSignature
Expand All @@ -155,15 +189,17 @@ func ProcessSignatures(configSignatures []core.ConfigSignature) {
log.Debugf("Simple Signature %s %s %s %s %d", signature.Name,
signature.Part, signature.Match, signature.Severity, signature.ID)

matchSignatureMap[signature.Part][signature.Match] = signature

switch signature.Part {
case ContentsPart:
addToSignatures(signature, &simpleContentSignatures)
simpleContentSignatures = append(simpleContentSignatures, signature.Match)
case ExtPart:
addToSignatures(signature, &simpleExtSignatures)
simpleExtSignatures = append(simpleExtSignatures, signature.Match)
case FilenamePart:
addToSignatures(signature, &simpleFilenameSignatures)
simpleFilenameSignatures = append(simpleFilenameSignatures, signature.Match)
case PathPart:
addToSignatures(signature, &simplePathSignatures)
simplePathSignatures = append(simplePathSignatures, signature.Match)
}
} else {
if signature.Severity == "" {
Expand Down Expand Up @@ -197,10 +233,10 @@ func ProcessSignatures(configSignatures []core.ConfigSignature) {

}

simpleSignatureMap[ContentsPart] = simpleContentSignatures
simpleSignatureMap[ExtPart] = simpleExtSignatures
simpleSignatureMap[FilenamePart] = simpleFilenameSignatures
simpleSignatureMap[PathPart] = simplePathSignatures
matchRegexpMap[ContentsPart] = regexp.MustCompile(strings.Join(simpleContentSignatures, "|"))
matchRegexpMap[ExtPart] = regexp.MustCompile(strings.Join(simpleExtSignatures, "|"))
matchRegexpMap[FilenamePart] = regexp.MustCompile(strings.Join(simpleFilenameSignatures, "|"))
matchRegexpMap[PathPart] = regexp.MustCompile(strings.Join(simplePathSignatures, "|"))

patternSignatureMap[ContentsPart] = patternContentSignatures
patternSignatureMap[ExtPart] = patternExtSignatures
Expand All @@ -209,7 +245,7 @@ func ProcessSignatures(configSignatures []core.ConfigSignature) {

for _, part := range []string{ContentsPart, FilenamePart, PathPart, ExtPart} {
log.Debugf("Number of Complex Patterns for matching %s: %d", part, len(patternSignatureMap[part]))
log.Debugf("Number of Simple Patterns for matching %s: %d", part, len(simpleSignatureMap[part]))
log.Debugf("Number of Simple Patterns for matching %s: %d", part, len(matchSignatureMap[part]))
}
}

Expand All @@ -221,92 +257,6 @@ func addToSignatures(signature core.ConfigSignature, Signatures *[]core.ConfigSi
*Signatures = append(*Signatures, signature)
}

// For large regex patterns, if Hyperscan finds a match, then
// find the matching indexes directly as start of match (SOM) hyperscan flag doesn't work
// for large patterns.
// @parameters
// sid - ID of matched rule
// from - Start index of the match
// to - End endex of the match
// hsIOData - Metadata containing contents being matched, filename, layerID etc.
// @returns
// int - Exact start index of the large complex regex matches
func getStartOfLargeRegexMatch(sid int, from, to int, hsIOData HsInputOutputData) int {
inputData := hsIOData.inputData
// secrets := hsIOData.secretsFound

pattern := signatureIDMap[sid].CompiledRegex
// Hyperscan doesn't give the start of the match, but it give the end of the match for complex patterns
start := Max(0, to-MaxSecretLength)
// Search between [to-MaxSecretLength, to] to find exact match of secret
end := to
allMatchedIndexes := pattern.FindAllIndex(inputData[start:end], -1)
log.Debugf("Number of matches found for large regex pattern: %d", len(allMatchedIndexes))
for i, loc := range allMatchedIndexes {
// Currently just print the last match as we know the end of the hyperscan match
if i == len(allMatchedIndexes)-1 {
// secret := printMatchedSignatures(sid, start+loc[0], start+loc[1], hsIOData)
// *secrets = append(*secrets, secret)
return start + loc[0]
}
}

// It shouldn't reach here. Return "from" as start index, by default
return from
}

// Print matched secrets on standard output as well as in output files in json format etc.
// @parameters
// sid - ID of matched rule
// from - Start index of the match
// to - End endex of the match
// hsIOData - Metadata containing contents being matched, filename, layerID etc.
// @returns
// output.SecretFound - secret found
// Error - Errors if any. Otherwise, returns nil
func printMatchedSignatures(sid int, from, to int, hsIOData HsInputOutputData) (output.SecretFound, error) {
inputData := hsIOData.inputData
completeFilename := hsIOData.completeFilename
layerID := hsIOData.layerID

updatedSeverity, updatedScore := calculateSeverity(inputData[from:to], signatureIDMap[sid].Severity, signatureIDMap[sid].SeverityScore)

log.Debugf("Pattern Signature %s %s %s %s %s %s %.2f %d", signatureIDMap[sid].Name, signatureIDMap[sid].Part,
signatureIDMap[sid].Match, signatureIDMap[sid].Regex, signatureIDMap[sid].RegexType,
updatedSeverity, updatedScore, signatureIDMap[sid].ID)
// fmt.Println(signatureIDMap[sid].Name, signatureIDMap[sid].Part, signatureIDMap[sid].Match, signatureIDMap[sid].Regex,
// signatureIDMap[sid].RegexType, updatedSeverity, updatedScore, signatureIDMap[sid].ID)
log.Debugf("Secret found in %s of %s within bytes %d and %d", signatureIDMap[sid].Part, completeFilename, from, to)
// fmt.Println("Secret found in", signatureIDMap[sid].Part, "of", completeFilename, "withing bytes", from, "and", to)

start := Max(0, bytes.LastIndexByte(inputData[:from], '\n')) // Avoid -ve value from IndexByte
end := to + Max(0, bytes.IndexByte(inputData[to:], '\n')) // Avoid -ve value from IndexByte

// Display max 50 bytes before and after the maching string
start = Max(start, from-50)
end = Min(end, to+50)

if !(0 <= start && start <= from && from <= to && to <= end && end <= len(inputData)) {
return output.SecretFound{}, errors.New("index out of bound while printing matched signatures")
}

// coloredMatch := fmt.Sprintf("%s%s%s\n", inputData[start:from], color.RedString(string(inputData[from:to])), inputData[to:end])
// //log.Infof("%s%s%s\n", inputData[start:from], color.RedString(string(inputData[from:to])), inputData[to:end])
// log.Infof(coloredMatch)

secret := output.SecretFound{
LayerID: layerID,
RuleID: sid, RuleName: signatureIDMap[sid].Name,
PartToMatch: signatureIDMap[sid].Part, Match: signatureIDMap[sid].Match, Regex: signatureIDMap[sid].Regex,
Severity: updatedSeverity, SeverityScore: updatedScore,
CompleteFilename: completeFilename,
PrintBufferStartIndex: start, MatchFromByte: from - start, MatchToByte: to - start,
MatchedContents: string(inputData[start:end]),
}

return secret, nil
}

// Update severity and score based on length of match
// @parameters
// inputMatch - Matched portion of the input
Expand Down Expand Up @@ -361,23 +311,3 @@ func Max(value_0, value_1 int) int {
}
return value_1
}

func BuildRegexes() {
for _, part := range []string{ContentsPart, FilenamePart, PathPart, ExtPart} {
log.Debugf("Compile regexp database for %s", part)
CompileRegexpPatterns(part)
}
}

func CompileRegexpPatterns(part string) {
log.Debugf("Number of Complex Patterns for matching %s: %d", part, len(patternSignatureMap[part]))
for i, signature := range patternSignatureMap[part] {
log.Debugf("Pattern Signature %s %s %s %s %s %s %d",
signature.Name, signature.Part, signature.Match,
signature.Regex, signature.RegexType, signature.Severity,
signature.ID)

signature.CompiledRegex = regexp.MustCompile(signature.Regex)
patternSignatureMap[part][i] = signature
}
}

0 comments on commit 720c103

Please sign in to comment.