Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve: Parse POM from maven repo to extract and save license information in Java DB #21

Open
wants to merge 45 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
c3c9e2c
improve: [https://github.com/aquasecurity/trivy/discussions/4236] Par…
namandf Aug 7, 2023
e5e60fa
Add notes to trim license string beyond a certain limit
namandf Aug 7, 2023
eb56cc3
Remove dependency on an external module for POM parsing.
namandf Aug 8, 2023
ecab1d4
Update separator for license strings to | instead of ,
namandf Aug 8, 2023
8ff0829
go mod tidy
namandf Aug 9, 2023
6ea37a5
Deduplicate licenses fetched from POM
namandf Aug 9, 2023
239ddc8
Use license classifier for information obtained from POM
namandf Aug 9, 2023
4c23e78
Cache license information and use it on the fly while writing data to db
namandf Aug 9, 2023
302e619
Cache directory creation
namandf Aug 9, 2023
153a133
Cache directory creation
namandf Aug 9, 2023
4dbd769
Cache directory creation
namandf Aug 9, 2023
9bc1dbb
Add test cases for license classifier
namandf Aug 9, 2023
28ef2b3
sort license keys before they are written to indexes cache directory
namandf Aug 9, 2023
ff74564
update comment
namandf Aug 9, 2023
9a557a4
update comment
namandf Aug 9, 2023
e214d84
precautionary check to limit licnense strings to 30 characters
namandf Aug 9, 2023
21885b1
Update timeout for license classification
namandf Aug 9, 2023
3a4156e
update batch size
namandf Aug 9, 2023
afd7dbb
license meta variable name change
namandf Aug 9, 2023
c5b40fd
use crawler http client while creating license files
namandf Aug 9, 2023
c7fca19
Update license file creation logic
namandf Aug 9, 2023
6608af3
test
namandf Aug 9, 2023
560980f
Add timeout for prepareClassifierData
namandf Aug 9, 2023
0608b3e
improvements
namandf Aug 9, 2023
5ec049b
improvements
namandf Aug 9, 2023
2467ee3
improve processing of license classifier results
namandf Aug 10, 2023
30bc298
Remove files list from crawler struct
namandf Aug 10, 2023
d483d27
Get rid of processFilesMap
namandf Aug 10, 2023
7be6cf4
Add constant for normalized license json file name
namandf Aug 10, 2023
2f4c07c
Update test cases
namandf Aug 10, 2023
1640f0a
clean up
namandf Aug 10, 2023
512dba5
clean up
namandf Aug 10, 2023
c4108cd
clean up
namandf Aug 10, 2023
7cc48c1
Add a utils file in crawler package
namandf Aug 10, 2023
33f2473
Update comment
namandf Aug 10, 2023
b001b9f
Set classification confidence
namandf Aug 10, 2023
029dcfe
Set classification confidence
namandf Aug 10, 2023
176caa8
Add check for license string length
namandf Aug 10, 2023
87c7b41
Speed up license file generation
namandf Aug 11, 2023
a0769b5
Add comment
namandf Aug 11, 2023
d7a9bdf
reuse http client
namandf Aug 11, 2023
3f3344b
skip file copy error
namandf Aug 11, 2023
e991f76
Handle empty normalized license strings
namandf Aug 11, 2023
674534e
Add temporary log to check license strings being trimmed
namandf Aug 16, 2023
8a924f3
Increase license string length threshold to 150 characters
namandf Aug 16, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ require (
github.com/hashicorp/go-retryablehttp v0.7.2
github.com/spf13/cobra v1.6.1
github.com/stretchr/testify v1.8.1
github.com/vifraa/gopom v0.2.2
golang.org/x/net v0.0.0-20201021035429-f5854403a974
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2
k8s.io/utils v0.0.0-20230115233650-391b47cb4029
Expand All @@ -32,8 +34,8 @@ require (
github.com/rivo/uniseg v0.2.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/mod v0.3.0 // indirect
golang.org/x/net v0.0.0-20201021035429-f5854403a974 // indirect
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab // indirect
golang.org/x/text v0.3.3 // indirect
golang.org/x/tools v0.0.0-20201124115921-2c860bdd6e78 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
lukechampine.com/uint128 v1.2.0 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/vifraa/gopom v0.2.2 h1:zrqoCUVIplcsETouv3xxHPvfI/WV1GUPrdX2+Diahzo=
github.com/vifraa/gopom v0.2.2/go.mod h1:oPa1dcrGrtlO37WPDBm5SqHAT+wTgF8An1Q71Z6Vv4o=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
Expand All @@ -81,6 +84,7 @@ golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab h1:2QkjZIsXupsJbJIdSjjUOgWK3aEtzyuh2mPt3l/CkeU=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
Expand Down
1 change: 1 addition & 0 deletions pkg/builder/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ func (b *Builder) Build(cacheDir string) error {
Version: ver.Version,
SHA1: ver.SHA1,
ArchiveType: index.ArchiveType,
License: ver.License,
})
}
bar.Increment()
Expand Down
50 changes: 48 additions & 2 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,20 @@ import (
"encoding/hex"
"encoding/xml"
"fmt"
"github.com/aquasecurity/trivy-java-db/pkg/fileutil"
"github.com/aquasecurity/trivy-java-db/pkg/types"
"io"
"log"
"net/http"
"path/filepath"
"strings"
"sync"

"github.com/aquasecurity/trivy-java-db/pkg/fileutil"
"github.com/aquasecurity/trivy-java-db/pkg/types"

"github.com/PuerkitoBio/goquery"
"github.com/hashicorp/go-retryablehttp"
"github.com/vifraa/gopom"
"golang.org/x/net/html/charset"
"golang.org/x/sync/semaphore"
"golang.org/x/xerrors"
)
Expand Down Expand Up @@ -176,9 +179,19 @@ func (c *Crawler) crawlSHA1(baseURL string, meta *Metadata) error {
return err
}
if len(sha1) != 0 {
// fetch licenses
pomFileName := fmt.Sprintf("/%s-%s.pom", meta.ArtifactID, version)
pomURL := baseURL + version + pomFileName
pomLicense, err := c.fetchPOMLicense(pomURL)
if err != nil {
// TODO: Check if we can change this log to a warning or ignore it
log.Println(err)
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
}

v := Version{
Version: version,
SHA1: sha1,
License: pomLicense,
}
versions = append(versions, v)
}
Expand Down Expand Up @@ -262,3 +275,36 @@ func (c *Crawler) fetchSHA1(url string) ([]byte, error) {
}
return sha1b, nil
}

func (c *Crawler) fetchPOMLicense(url string) (string, error) {
resp, err := c.http.Get(url)
if resp.StatusCode == http.StatusNotFound {
return "", nil
}
if err != nil {
return "", xerrors.Errorf("can't get pom xml from %s: %w", url, err)
}
defer resp.Body.Close()

var pomProject gopom.Project
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved

decoder := xml.NewDecoder(resp.Body)
decoder.CharsetReader = charset.NewReaderLabel
err = decoder.Decode(&pomProject)

if err != nil {
return "", xerrors.Errorf("can't parse pom xml from %s: %w", url, err)
}

if len(pomProject.Licenses) == 0 {
return "", nil
}

var licenses []string
for _, l := range pomProject.Licenses {
licenses = append(licenses, l.Name)
}
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved

// TODO: Check if we can limit the length of license string i.e trim and save
return strings.Join(licenses, ","), nil
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we need to think about this, because some licenses (e.g. The Apache License, Version 2.0) can contain commas. In this case will be confusion  if we will split by commas (e.g. The Apache License, Version 2.0,The 2-Clause BSD License).

Copy link
Author

@namandf namandf Aug 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding the , separator, we can probably use |

Gave license classifier a try.

It expects a license file . Does not result in matches using just the license name in POM in most cases.
We essentially need to extract license URL from POM, fetch the contents of URL, dump it in a file and then use license classifier for the right results.

It will add an additional overhead. We might have to add an in memory cache as well in order to avoid redundant processing.
In cases where URL is missing in POM, we'll have to rely on the license name in POM.

Effectively a 3 step process:

  1. Fetch license URL contents and classify license
  2. If URL is missing or license couldn't be classified in step 1 , then classify license using license name in POM
  3. If step 2 also fails then dump the license name as is (precautionary check to limit the license string length)
    In all cases where license classifier is being used, confidence threshold used could be 80%

Integrating this in the current design of java db might be a problem, since concurrent goroutines for license classification might not work. It instantiates a backend instance , expects a list of license files, spawns go routines to process them and accumulates results in the backend.
Still need to figure out the specifics.

=========

The other perspective could be not over complicate things and claim support for whats highlighted in pom.xml. Since the user controls the pom.xml , it's their responsibility to specify the license information accurately. We will show whatever resides in pom.xml (license.Name) with some precautionary checks.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Integrating this in the current design of java db might be a problem, since concurrent goroutines for license classification might not work. It instantiates a backend instance , expects a list of license files, spawns go routines to process them and accumulates results in the backend.

This is really problem. We can add new step before crawler: parse all pom.xml files (as with sha1 files) as you said (Effectively a 3 step process:) and save to map/file (url -> license name). And use this map/file in crawler.
But there may be problem with CI/CD (work time, memory, etc...).

The other perspective could be not over complicate things and claim support for whats highlighted in pom.xml. Since the user controls the pom.xml , it's their responsibility to specify the license information accurately. We will show whatever resides in pom.xml (license.Name) with some precautionary checks.

I think it is good solution. At least we can start with this
Maven docs say(https://maven.apache.org/pom.html#Licenses): Using an SPDX identifier as the license name is recommended.
It is not required, but we can refer on it.

Copy link
Author

@namandf namandf Aug 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really problem. We can add new step before crawler: parse all pom.xml files (as with sha1 files) as you said (Effectively a 3 step process:) and save to map/file (url -> license name). And use this map/file in crawler.
But there may be problem with CI/CD (work time, memory, etc...).

Let me give this a try.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated the PR.

  1. Crawl to fetch GAV information + parse POM to extract license keys (hash of url if available , else name)
  2. Save license keys along with GAV information in the indexes cache directory and simultaneously build a map of unique license keys with meta information about license name and url
  3. Use license classifier to find license info associated with unique license keys found in step 2 and save them referenced by license key in a licenses cache directory
  4. As part of build, parse the cached data in indexes directory. Extract license keys from the same and use it to extract license metadata from the licenses cache directory.
  5. Write aggregated data to DB

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The DB size now is just 50MB more with license information(800MB) vs 750MB without license information.

| separated list of unique licenses :
licenses.csv

The only problem seems to be time taken for building java db. https://github.com/aquasecurity/trivy-java-db/pull/21/files#diff-526a7142f7b15a72eab215dcfa540f5b6d502951a2f9f3b34dd900013e82c675R426 seems to be a major contributor other than additional POM parsing.

Don't think it should be a problem though since we refresh the database once a week.

Copy link
Collaborator

@DmitriyLewen DmitriyLewen Aug 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is good way for us. And changes of place look good. 50mb is not much.

}
6 changes: 5 additions & 1 deletion pkg/crawler/crawler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ package crawler_test

import (
"context"
"github.com/stretchr/testify/assert"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"

"github.com/aquasecurity/trivy-java-db/pkg/crawler"
)

Expand All @@ -27,8 +28,11 @@ func TestCrawl(t *testing.T) {
"/maven2/abbot/abbot/": "testdata/abbot_abbot.html",
"/maven2/abbot/abbot/maven-metadata.xml": "testdata/maven-metadata.xml",
"/maven2/abbot/abbot/0.12.3/abbot-0.12.3.jar.sha1": "testdata/abbot-0.12.3.jar.sha1",
"/maven2/abbot/abbot/0.12.3/abbot-0.12.3.pom": "testdata/abbot-0.12.3.pom",
"/maven2/abbot/abbot/0.13.0/abbot-0.13.0.jar.sha1": "testdata/abbot-0.13.0.jar.sha1",
"/maven2/abbot/abbot/0.13.0/abbot-0.13.0.pom": "testdata/abbot-0.13.0.pom",
"/maven2/abbot/abbot/1.4.0/abbot-1.4.0.jar.sha1": "testdata/abbot-1.4.0.jar.sha1",
"/maven2/abbot/abbot/1.4.0/abbot-1.4.0.pom": "testdata/abbot-1.4.0.pom",
},
goldenPath: "testdata/golden/abbot.json",
filePath: "indexes/abbot/abbot.json",
Expand Down
1 change: 1 addition & 0 deletions pkg/crawler/testdata/abbot-0.12.3.pom
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
invalid pom
1 change: 1 addition & 0 deletions pkg/crawler/testdata/abbot-0.13.0.pom
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
invalid pom
46 changes: 46 additions & 0 deletions pkg/crawler/testdata/abbot-1.4.0.pom
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<project>
<modelVersion>4.0.0</modelVersion>
<groupId>abbot</groupId>
<artifactId>abbot</artifactId>
<version>1.4.0</version>
<name>Abbot Java GUI Test Library</name>
<description>Abbot provides a wrapper around java.awt.Robot to make testing AWT and Swing Applications easier</description>
<url>http://abbot.sf.net/</url>


<licenses>
<license>
<name>EPL</name>
<url>https://www.eclipse.org/legal/epl-v10.html</url>
</license>
</licenses>


<developers>
<developer>
<name>Gerard Davisonr</name>
<email>[email protected]</email>
<organization>Oralce</organization>
<organizationUrl>http://www.oracle.com</organizationUrl>
</developer>
</developers>


<scm>
<connection>scm:svn://svn.code.sf.net/p/abbot/svn/trunkabbot/trunk/</connection>
<developerConnection>scm:svn://svn.code.sf.net/p/abbot/svn/trunkabbot/trunk/</developerConnection>
<url>http://sourceforge.net/p/abbot/svn/HEAD/tree/abbot/trunk/</url>
</scm>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.2</version>
</dependency>
</dependencies>



</project>
9 changes: 6 additions & 3 deletions pkg/crawler/testdata/golden/abbot.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@
"Versions": [
{
"Version": "0.12.3",
"SHA1": "UdKKJ9kZzoaQpA9PM1udWRzrFuk="
"SHA1": "UdKKJ9kZzoaQpA9PM1udWRzrFuk=",
"License": ""
},
{
"Version": "0.13.0",
"SHA1": "WW2R5nYxsN6wX7aF2NG2c18+T2A="
"SHA1": "WW2R5nYxsN6wX7aF2NG2c18+T2A=",
"License": ""
},
{
"Version": "1.4.0",
"SHA1": "ojY2RqndBZVWM7RQAQtZohr4pCM="
"SHA1": "ojY2RqndBZVWM7RQAQtZohr4pCM=",
"License": "EPL"
}
],
"ArchiveType": "jar"
Expand Down
1 change: 1 addition & 0 deletions pkg/crawler/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ type Index struct {
type Version struct {
Version string
SHA1 []byte
License string
}
20 changes: 10 additions & 10 deletions pkg/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func (db *DB) Init() error {
if _, err := db.client.Exec("CREATE TABLE artifacts(id INTEGER PRIMARY KEY, group_id TEXT, artifact_id TEXT)"); err != nil {
return xerrors.Errorf("unable to create 'artifacts' table: %w", err)
}
if _, err := db.client.Exec("CREATE TABLE indices(artifact_id INTEGER, version TEXT, sha1 BLOB, archive_type TEXT, foreign key (artifact_id) references artifacts(id))"); err != nil {
if _, err := db.client.Exec("CREATE TABLE indices(artifact_id INTEGER, version TEXT, sha1 BLOB, archive_type TEXT, license TEXT, foreign key (artifact_id) references artifacts(id))"); err != nil {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we should create a license table and store foreign keys here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could have done that but it would require 2 tables. One to track licenses and the other to maintain a mapping of indices logical id to license logical id since it isn't a 1-1 mapping.

Querying the data would involve JOINS which would mean a change in all queries. Moreever the effort involved didn't seem to reap proportionate benefits since the db size with the current change is just 50MB more than normal and queries are simpler.

Intended to keep the changes to existing code as minimal as possible.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could have done that but it would require 2 tables. One to track licenses and the other to maintain a mapping of indices logical id to license logical id since it isn't a 1-1 mapping.

It is RDBMS. I don't think this change would be complex.

Copy link
Author

@namandf namandf Sep 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. Since there is no use case of filtering by license strings, I thought normalization can be deferred and queries can be kept simpler where a direct access to the row in the indices table will have all required info.

It does affect the database size since information is replicated, but it didn't seem to have a major impact hence decided to stick to it.

But I agree, in the long run it might blow up and cause issues w.r.t database size. Don't foresee query performance issues since indexes will help efficiently access the artifact metadata. Will let you guys take a call.

return xerrors.Errorf("unable to create 'indices' table: %w", err)
}

Expand Down Expand Up @@ -110,13 +110,13 @@ func (db *DB) InsertIndexes(indexes []types.Index) error {

for _, index := range indexes {
_, err = tx.Exec(`
INSERT INTO indices(artifact_id, version, sha1, archive_type)
INSERT INTO indices(artifact_id, version, sha1, archive_type, license)
VALUES (
(SELECT id FROM artifacts
WHERE group_id=? AND artifact_id=?),
?, ?, ?
?, ?, ?, ?
) ON CONFLICT(sha1) DO NOTHING`,
index.GroupID, index.ArtifactID, index.Version, index.SHA1, index.ArchiveType)
index.GroupID, index.ArtifactID, index.Version, index.SHA1, index.ArchiveType, index.License)
if err != nil {
return xerrors.Errorf("unable to insert to 'indices' table: %w", err)
}
Expand Down Expand Up @@ -147,12 +147,12 @@ func (db *DB) SelectIndexBySha1(sha1 string) (types.Index, error) {
return index, xerrors.Errorf("sha1 decode error: %w", err)
}
row := db.client.QueryRow(`
SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type
SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type, i.license
FROM indices i
JOIN artifacts a ON a.id = i.artifact_id
WHERE i.sha1 = ?`,
sha1b)
err = row.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType)
err = row.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType, &index.License)
if err != nil && !errors.Is(err, sql.ErrNoRows) {
return index, xerrors.Errorf("select index error: %w", err)
}
Expand All @@ -162,12 +162,12 @@ func (db *DB) SelectIndexBySha1(sha1 string) (types.Index, error) {
func (db *DB) SelectIndexByArtifactIDAndGroupID(artifactID, groupID string) (types.Index, error) {
var index types.Index
row := db.client.QueryRow(`
SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type
SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type, i.license
FROM indices i
JOIN artifacts a ON a.id = i.artifact_id
WHERE a.group_id = ? AND a.artifact_id = ?`,
groupID, artifactID)
err := row.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType)
err := row.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType, &index.License)
if err != nil && !errors.Is(err, sql.ErrNoRows) {
return index, xerrors.Errorf("select index error: %w", err)
}
Expand All @@ -178,7 +178,7 @@ func (db *DB) SelectIndexesByArtifactIDAndFileType(artifactID string, fileType t
error) {
var indexes []types.Index
rows, err := db.client.Query(`
SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type
SELECT a.group_id, a.artifact_id, i.version, i.sha1, i.archive_type, i.license
FROM indices i
JOIN artifacts a ON a.id = i.artifact_id
WHERE a.artifact_id = ? AND i.archive_type = ?`,
Expand All @@ -188,7 +188,7 @@ func (db *DB) SelectIndexesByArtifactIDAndFileType(artifactID string, fileType t
}
for rows.Next() {
var index types.Index
if err = rows.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType); err != nil {
if err = rows.Scan(&index.GroupID, &index.ArtifactID, &index.Version, &index.SHA1, &index.ArchiveType, &index.License); err != nil {
return nil, xerrors.Errorf("scan row error: %w", err)
}
indexes = append(indexes, index)
Expand Down
33 changes: 33 additions & 0 deletions pkg/db/db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
var (
jstlSha1b, _ = hex.DecodeString("9c581de633e94be1e7a955bd4e8292f16e554387")
javaxServletSha1b, _ = hex.DecodeString("bca201e52333629c59e459e874e5ecd8f9899e15")
junitSHA, _ = hex.DecodeString("1013627e3993319870863a020034004717505815")
indexJstl = types.Index{
GroupID: "jstl",
ArtifactID: "jstl",
Expand All @@ -31,6 +32,14 @@ var (
SHA1: javaxServletSha1b,
ArchiveType: types.JarType,
}
indexJunit = types.Index{
GroupID: "junit",
ArtifactID: "junit",
Version: "4.9",
SHA1: junitSHA,
ArchiveType: types.JarType,
License: "Common Public License Version 1.0",
}
)

func TestSelectIndexBySha1(t *testing.T) {
Expand All @@ -52,12 +61,19 @@ func TestSelectIndexBySha1(t *testing.T) {
want: types.Index{},
assertErr: assert.NoError,
},
{
name: "index with license using sha",
sha1: "1013627e3993319870863a020034004717505815",
want: indexJunit,
assertErr: assert.NoError,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dbc, err := dbtest.InitDB(t, []types.Index{
indexJstl,
indexJavaxServlet,
indexJunit,
})
require.NoError(t, err)

Expand Down Expand Up @@ -97,12 +113,20 @@ func TestSelectIndexByArtifactIDAndGroupID(t *testing.T) {
want: types.Index{},
assertErr: assert.NoError,
},
{
name: "index with license using groupid and artifactid",
groupID: "junit",
artifactID: "junit",
want: indexJunit,
assertErr: assert.NoError,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dbc, err := dbtest.InitDB(t, []types.Index{
indexJstl,
indexJavaxServlet,
indexJunit,
})
require.NoError(t, err)

Expand Down Expand Up @@ -139,12 +163,21 @@ func TestSelectIndexesByArtifactIDAndFileType(t *testing.T) {
artifactID: "jstl",
archiveType: "wrong",
},
{
name: "index with license using artifactid and archivetype",
artifactID: "junit",
archiveType: types.JarType,
want: []types.Index{
indexJunit,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
dbc, err := dbtest.InitDB(t, []types.Index{
indexJstl,
indexJavaxServlet,
indexJunit,
})
require.NoError(t, err)

Expand Down
Loading