Skip to content

Commit

Permalink
✨ add dependency location resolver and get code locations for XML, JS…
Browse files Browse the repository at this point in the history
…ON files (#412)

Fixes #410 
Fixes #254 

There were two choices to do this: 

1. Add a new optional field lineNumber to Dep type, and let the
providers populate it at the time of dependency discovery. This is
straight-forward and probably preferred, but requires a change in the
output API.
2. Add a new interface like Snipper, that providers can implement for
deps to get locations for a certain dependency.

I chose No. 2 because output API change not needed. But I am not fan of
it as its complicated & perf penalty. Asking for your inputs on the
approach. If we do go with 2, I think we should we should still remove
it in later versions in favor of a new optional field on Dep. Even if we
did add it now, it would be non-intrusive.

---------

Signed-off-by: Pranav Gaikwad <[email protected]>
  • Loading branch information
pranavgaikwad authored Nov 16, 2023
1 parent 740142e commit 39ed5f5
Show file tree
Hide file tree
Showing 13 changed files with 1,834 additions and 53 deletions.
400 changes: 400 additions & 0 deletions demo-dep-output.yaml

Large diffs are not rendered by default.

84 changes: 84 additions & 0 deletions demo-output.yaml

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions provider/internal/builtin/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ func (p *builtinProvider) Init(ctx context.Context, log logr.Logger, config prov
config: config,
tags: p.tags,
UnimplementedDependenciesComponent: provider.UnimplementedDependenciesComponent{},
locationCache: make(map[string]float64),
log: log,
}, nil
}

Expand Down
114 changes: 101 additions & 13 deletions provider/internal/builtin/service_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@ import (
"regexp"
"strconv"
"strings"
"sync"

"github.com/antchfx/jsonquery"
"github.com/antchfx/xmlquery"
"github.com/antchfx/xpath"
"github.com/go-logr/logr"
"github.com/konveyor/analyzer-lsp/provider"
"github.com/konveyor/analyzer-lsp/tracing"
"go.lsp.dev/uri"
"gopkg.in/yaml.v2"
)
Expand All @@ -23,13 +26,15 @@ type builtinServiceClient struct {
config provider.InitConfig
tags map[string]bool
provider.UnimplementedDependenciesComponent
log logr.Logger

cacheMutex sync.RWMutex
locationCache map[string]float64
}

var _ provider.ServiceClient = &builtinServiceClient{}

func (p *builtinServiceClient) Stop() {
return
}
func (p *builtinServiceClient) Stop() {}

func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditionInfo []byte) (provider.ProviderEvaluateResponse, error) {
var cond builtinCondition
Expand Down Expand Up @@ -119,7 +124,7 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi
}
lineNumber, err := strconv.Atoi(pieces[1])
if err != nil {
return response, fmt.Errorf("Cannot convert line number string to integer")
return response, fmt.Errorf("cannot convert line number string to integer")
}
response.Incidents = append(response.Incidents, provider.IncidentContext{
FileURI: uri.File(ab),
Expand All @@ -140,18 +145,17 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi
case "xml":
query, err := xpath.CompileWithNS(cond.XML.XPath, cond.XML.Namespaces)
if query == nil || err != nil {
return response, fmt.Errorf("Could not parse provided xpath query '%s': %v", cond.XML.XPath, err)
return response, fmt.Errorf("could not parse provided xpath query '%s': %v", cond.XML.XPath, err)
}
//TODO(fabianvf): how should we scope the files searched here?
var xmlFiles []string
patterns := []string{"*.xml", "*.xhtml"}
xmlFiles, err = provider.GetFiles(p.config.Location, cond.XML.Filepaths, patterns...)
if err != nil {
return response, fmt.Errorf("Unable to find files using pattern `%s`: %v", patterns, err)
return response, fmt.Errorf("unable to find files using pattern `%s`: %v", patterns, err)
}

for _, file := range xmlFiles {

f, err := os.Open(file)
if err != nil {
fmt.Printf("unable to open file '%s': %v\n", file, err)
Expand Down Expand Up @@ -187,14 +191,21 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi
if err != nil {
ab = file
}
response.Incidents = append(response.Incidents, provider.IncidentContext{
incident := provider.IncidentContext{
FileURI: uri.File(ab),
Variables: map[string]interface{}{
"matchingXML": node.OutputXML(false),
"innerText": node.InnerText(),
"data": node.Data,
},
})
}
location, err := p.getLocation(ctx, ab, node.InnerText())
if err == nil {
incident.CodeLocation = &location
lineNo := int(location.StartPosition.Line)
incident.LineNumber = &lineNo
}
response.Incidents = append(response.Incidents, incident)
}
}
}
Expand All @@ -203,16 +214,24 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi
case "json":
query := cond.JSON.XPath
if query == "" {
return response, fmt.Errorf("Could not parse provided xpath query as string: %v", conditionInfo)
return response, fmt.Errorf("could not parse provided xpath query as string: %v", conditionInfo)
}
pattern := "*.json"
jsonFiles, err := provider.GetFiles(p.config.Location, cond.JSON.Filepaths, pattern)
if err != nil {
return response, fmt.Errorf("Unable to find files using pattern `%s`: %v", pattern, err)
return response, fmt.Errorf("unable to find files using pattern `%s`: %v", pattern, err)
}
for _, file := range jsonFiles {
f, err := os.Open(file)
if err != nil {
p.log.V(5).Error(err, "error opening json file", "file", file)
continue
}
doc, err := jsonquery.Parse(f)
if err != nil {
p.log.V(5).Error(err, "error parsing json file", "file", file)
continue
}
list, err := jsonquery.QueryAll(doc, query)
if err != nil {
return response, err
Expand All @@ -224,13 +243,20 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi
if err != nil {
ab = file
}
response.Incidents = append(response.Incidents, provider.IncidentContext{
incident := provider.IncidentContext{
FileURI: uri.File(ab),
Variables: map[string]interface{}{
"matchingJSON": node.InnerText(),
"data": node.Data,
},
})
}
location, err := p.getLocation(ctx, ab, node.InnerText())
if err == nil {
incident.CodeLocation = &location
lineNo := int(location.StartPosition.Line)
incident.LineNumber = &lineNo
}
response.Incidents = append(response.Incidents, incident)
}
}
}
Expand Down Expand Up @@ -258,6 +284,68 @@ func (p *builtinServiceClient) Evaluate(ctx context.Context, cap string, conditi
return response, fmt.Errorf("capability must be one of %v, not %s", capabilities, cap)
}
}

// getLocation attempts to get code location for given content in JSON / XML files
func (b *builtinServiceClient) getLocation(ctx context.Context, path, content string) (provider.Location, error) {
ctx, span := tracing.StartNewSpan(ctx, "getLocation")
defer span.End()
location := provider.Location{}

parts := strings.Split(content, "\n")
if len(parts) < 1 {
return location, fmt.Errorf("unable to get code location, empty content")
} else if len(parts) > 5 {
// limit content to search
parts = parts[:5]
}
lines := []string{}
for _, part := range parts {
line := strings.Trim(part, " ")
line = strings.ReplaceAll(line, "\t", "")
line = regexp.QuoteMeta(line)
if line != "" {
lines = append(lines, line)
}
}
if len(lines) < 1 {
return location, fmt.Errorf("unable to get code location, no-op content")
}
pattern := fmt.Sprintf(".*?%s", strings.Join(lines, ".*?"))

cacheKey := fmt.Sprintf("%s-%s", path, pattern)
b.cacheMutex.RLock()
val, exists := b.locationCache[cacheKey]
b.cacheMutex.RUnlock()
if exists {
if val == -1 {
return location, fmt.Errorf("unable to get location due to a previous error")
}
return provider.Location{
StartPosition: provider.Position{
Line: float64(val),
},
EndPosition: provider.Position{
Line: float64(val),
},
}, nil
}

defer func() {
b.cacheMutex.Lock()
b.locationCache[cacheKey] = location.StartPosition.Line
b.cacheMutex.Unlock()
}()

location.StartPosition.Line = -1
lineNumber, err := provider.MultilineGrep(ctx, len(lines), path, pattern)
if err != nil || lineNumber == -1 {
return location, fmt.Errorf("unable to get location in file %s - %w", path, err)
}
location.StartPosition.Line = float64(lineNumber)
location.EndPosition.Line = float64(lineNumber)
return location, nil
}

func findFilesMatchingPattern(root, pattern string) ([]string, error) {
var regex *regexp.Regexp
// if the regex doesn't compile, we'll default to using filepath.Match on the pattern directly
Expand Down
42 changes: 34 additions & 8 deletions provider/internal/java/dependency.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ const (
providerSpecificConfigExcludePackagesKey = "excludePackages"
)

// keys used in dep.Extras for extra information about a dep
const (
artifactIdKey = "artifactId"
groupIdKey = "groupId"
pomPathKey = "pomPath"
)

// TODO implement this for real
func (p *javaServiceClient) findPom() string {
var depPath string
Expand All @@ -44,9 +51,13 @@ func (p *javaServiceClient) findPom() string {
}

func (p *javaServiceClient) GetDependencies(ctx context.Context) (map[uri.URI][]*provider.Dep, error) {
if p.depsCache != nil {
return p.depsCache, nil
p.depsMutex.RLock()
val := p.depsCache
p.depsMutex.RUnlock()
if val != nil {
return val, nil
}

var err error
var ll map[uri.URI][]konveyor.DepDAGItem
m := map[uri.URI][]*provider.Dep{}
Expand Down Expand Up @@ -74,7 +85,9 @@ func (p *javaServiceClient) GetDependencies(ctx context.Context) (map[uri.URI][]
}
m[f] = deps
}
p.depsMutex.Lock()
p.depsCache = m
p.depsMutex.Unlock()
return m, nil
}

Expand Down Expand Up @@ -140,6 +153,11 @@ func (p *javaServiceClient) GetDependenciesFallback(ctx context.Context, locatio
}
dep := provider.Dep{}
dep.Name = fmt.Sprintf("%s.%s", *d.GroupID, *d.ArtifactID)
dep.Extras = map[string]interface{}{
groupIdKey: *d.GroupID,
artifactIdKey: *d.ArtifactID,
pomPathKey: path,
}
if *d.Version != "" {
if strings.Contains(*d.Version, "$") {
version := strings.TrimSuffix(strings.TrimPrefix(*d.Version, "${"), "}")
Expand Down Expand Up @@ -171,7 +189,9 @@ func (p *javaServiceClient) GetDependenciesFallback(ctx context.Context, locatio

m := map[uri.URI][]*provider.Dep{}
m[uri.File(path)] = deps
p.depsMutex.Lock()
p.depsCache = m
p.depsMutex.Unlock()

// recursively find deps in submodules
if pom.Modules != nil {
Expand Down Expand Up @@ -229,7 +249,7 @@ func (p *javaServiceClient) GetDependenciesDAG(ctx context.Context) (map[uri.URI

var pomDeps []provider.DepDAGItem
for _, tree := range submoduleTrees {
submoduleDeps, err := p.parseMavenDepLines(tree, localRepoPath)
submoduleDeps, err := p.parseMavenDepLines(tree, localRepoPath, path)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -349,7 +369,7 @@ func (w *walker) walkDirForJar(path string, info fs.DirEntry, err error) error {

// parseDepString parses a java dependency string
// assumes format <group>:<name>:<type>:<version>:<scope>
func (p *javaServiceClient) parseDepString(dep, localRepoPath string) (provider.Dep, error) {
func (p *javaServiceClient) parseDepString(dep, localRepoPath, pomPath string) (provider.Dep, error) {
d := provider.Dep{}
// remove all the pretty print characters.
dep = strings.TrimFunc(dep, func(r rune) bool {
Expand Down Expand Up @@ -385,6 +405,12 @@ func (p *javaServiceClient) parseDepString(dep, localRepoPath string) (provider.
d.Labels = addDepLabels(p.depToLabels, d.Name)
d.FileURIPrefix = fmt.Sprintf("file://%v", filepath.Dir(fp))

d.Extras = map[string]interface{}{
groupIdKey: parts[0],
artifactIdKey: parts[1],
pomPathKey: pomPath,
}

return d, nil
}

Expand Down Expand Up @@ -412,10 +438,10 @@ func addDepLabels(depToLabels map[string]*depLabelItem, depName string) []string
}

// parseMavenDepLines recursively parses output lines from maven dependency tree
func (p *javaServiceClient) parseMavenDepLines(lines []string, localRepoPath string) ([]provider.DepDAGItem, error) {
func (p *javaServiceClient) parseMavenDepLines(lines []string, localRepoPath, pomPath string) ([]provider.DepDAGItem, error) {
if len(lines) > 0 {
baseDepString := lines[0]
baseDep, err := p.parseDepString(baseDepString, localRepoPath)
baseDep, err := p.parseDepString(baseDepString, localRepoPath, pomPath)
if err != nil {
return nil, err
}
Expand All @@ -425,15 +451,15 @@ func (p *javaServiceClient) parseMavenDepLines(lines []string, localRepoPath str
idx := 1
// indirect deps are separated by 3 or more spaces after the direct dep
for idx < len(lines) && strings.Count(lines[idx], " ") > 2 {
transitiveDep, err := p.parseDepString(lines[idx], localRepoPath)
transitiveDep, err := p.parseDepString(lines[idx], localRepoPath, pomPath)
if err != nil {
return nil, err
}
transitiveDep.Indirect = true
item.AddedDeps = append(item.AddedDeps, provider.DepDAGItem{Dep: transitiveDep})
idx += 1
}
ds, err := p.parseMavenDepLines(lines[idx:], localRepoPath)
ds, err := p.parseMavenDepLines(lines[idx:], localRepoPath, pomPath)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit 39ed5f5

Please sign in to comment.