From e151eb5c04bef0f563f22d682aeaf3b30fe9bf8d Mon Sep 17 00:00:00 2001 From: Shawn Hurley Date: Fri, 1 Dec 2023 13:16:49 -0500 Subject: [PATCH] :bug: using golang for filecontent searching" (#443) Reverts konveyor/analyzer-lsp#430 Signed-off-by: Shawn Hurley --- provider/internal/builtin/service_client.go | 127 ++++++-------------- rule-example.yaml | 4 +- 2 files changed, 36 insertions(+), 95 deletions(-) diff --git a/provider/internal/builtin/service_client.go b/provider/internal/builtin/service_client.go index 8e476b73..77be39da 100644 --- a/provider/internal/builtin/service_client.go +++ b/provider/internal/builtin/service_client.go @@ -1,13 +1,14 @@ package builtin import ( - "bufio" "context" "fmt" "io/fs" "os" + "os/exec" "path/filepath" "regexp" + "strconv" "strings" "sync" @@ -15,7 +16,6 @@ import ( "github.com/antchfx/xmlquery" "github.com/antchfx/xpath" "github.com/go-logr/logr" - "github.com/konveyor/analyzer-lsp/lsp/protocol" "github.com/konveyor/analyzer-lsp/provider" "github.com/konveyor/analyzer-lsp/tracing" "go.lsp.dev/uri" @@ -84,30 +84,53 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi if c.Pattern == "" { return response, fmt.Errorf("could not parse provided regex pattern as string: %v", conditionInfo) } - patternRegex, err := regexp.Compile(c.Pattern) + var outputBytes []byte + grep := exec.Command("grep", "-o", "-n", "-R", "-P", c.Pattern, p.config.Location) + outputBytes, err := grep.Output() if err != nil { - return response, err + if exitError, ok := err.(*exec.ExitError); ok && exitError.ExitCode() == 1 { + return response, nil + } + return response, fmt.Errorf("could not run grep with provided pattern %+v", err) } - matches, err := parallelWalk(p.config.Location, patternRegex) - if err != nil { - return response, err + + matches := []string{} + outputString := strings.TrimSpace(string(outputBytes)) + if outputString != "" { + matches = append(matches, strings.Split(outputString, "\n")...) } for _, match := range matches { //TODO(fabianvf): This will not work if there is a `:` in the filename, do we care? - containsFile, err := provider.FilterFilePattern(c.FilePattern, match.positionParams.TextDocument.URI) + pieces := strings.SplitN(match, ":", 3) + if len(pieces) != 3 { + //TODO(fabianvf): Just log or return? + //(shawn-hurley): I think the return is good personally + return response, fmt.Errorf( + "malformed response from grep, cannot parse grep output '%s' with pattern {filepath}:{lineNumber}:{matchingText}", match) + } + + containsFile, err := provider.FilterFilePattern(c.FilePattern, pieces[0]) if err != nil { return response, err } if !containsFile { continue } - lineNumber := int(match.positionParams.Position.Line) + + ab, err := filepath.Abs(pieces[0]) + if err != nil { + ab = pieces[0] + } + lineNumber, err := strconv.Atoi(pieces[1]) + if err != nil { + return response, fmt.Errorf("cannot convert line number string to integer") + } response.Incidents = append(response.Incidents, provider.IncidentContext{ - FileURI: uri.URI(match.positionParams.TextDocument.URI), + FileURI: uri.File(ab), LineNumber: &lineNumber, Variables: map[string]interface{}{ - "matchingText": match.match, + "matchingText": pieces[2], }, CodeLocation: &provider.Location{ StartPosition: provider.Position{Line: float64(lineNumber)}, @@ -349,85 +372,3 @@ func findFilesMatchingPattern(root, pattern string) ([]string, error) { }) return matches, err } - -type walkResult struct { - positionParams protocol.TextDocumentPositionParams - match string -} - -func parallelWalk(location string, regex *regexp.Regexp) ([]walkResult, error) { - var positions []walkResult - positionsChan := make(chan walkResult) - wg := &sync.WaitGroup{} - - go func() { - err := filepath.Walk(location, func(path string, f os.FileInfo, err error) error { - if err != nil { - return err - } - - if f.Mode().IsRegular() { - wg.Add(1) - go processFile(path, regex, positionsChan, wg) - } - - return nil - }) - - if err != nil { - return - } - - wg.Wait() - close(positionsChan) - }() - - for pos := range positionsChan { - positions = append(positions, pos) - } - - return positions, nil -} - -func processFile(path string, regex *regexp.Regexp, positionsChan chan<- walkResult, wg *sync.WaitGroup) { - defer wg.Done() - - content, err := os.ReadFile(path) - if err != nil { - return - } - - // Must go through each line, - forceFullFileScan := false - if strings.Contains(regex.String(), "^") { - forceFullFileScan = true - } - - if regex.Match(content) || forceFullFileScan { - scanner := bufio.NewScanner(strings.NewReader(string(content))) - lineNumber := 1 - for scanner.Scan() { - matchLocations := regex.FindAllStringIndex(scanner.Text(), -1) - matchStrings := regex.FindAllString(scanner.Text(), -1) - for i, loc := range matchLocations { - absPath, err := filepath.Abs(path) - if err != nil { - return - } - positionsChan <- walkResult{ - positionParams: protocol.TextDocumentPositionParams{ - TextDocument: protocol.TextDocumentIdentifier{ - URI: fmt.Sprintf("file://%s", absPath), - }, - Position: protocol.Position{ - Line: uint32(lineNumber), - Character: uint32(loc[1]), - }, - }, - match: matchStrings[i], - } - } - lineNumber++ - } - } -} diff --git a/rule-example.yaml b/rule-example.yaml index 1ce511a1..81c957e1 100644 --- a/rule-example.yaml +++ b/rule-example.yaml @@ -184,7 +184,7 @@ ruleID: filecontent-codesnip-test when: builtin.filecontent: - pattern: "^.*openjdk-11.*" + pattern: "^FROM.*openjdk-11.*" filePattern: "Dockerfile" - message: python sample rule 001 @@ -201,4 +201,4 @@ ruleID: python-sample-rule-003 when: python.referenced: - pattern: "create_custom_resource_definition" + pattern: "create_custom_resource_definition" \ No newline at end of file