Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(license): improve license normalization #7131

Merged
merged 28 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
44b0164
(feat)Improve license normalization
pbaumard Jul 9, 2024
5adc524
Fix indenting
pbaumard Jul 9, 2024
6b52af5
Fix format and test
pbaumard Jul 10, 2024
4dc5070
mapping back to private and other suffixes ignore from SPDX
pbaumard Jul 11, 2024
60d29b0
Do not change unmapped license name
pbaumard Jul 12, 2024
ab62262
Additional mappings from oss-review-toolkit and space normalization
pbaumard Jul 12, 2024
d0101fc
Fix double space
pbaumard Jul 12, 2024
cd7d779
Add back GPL WITH AUTOCONF EXCEPTION
pbaumard Jul 12, 2024
aa7ee3c
Normalize CycloneDX licenses and fix tests
pbaumard Jul 15, 2024
c21e723
Add MPL mappings
pbaumard Jul 16, 2024
3055e10
Update golden test files
pbaumard Jul 16, 2024
6c168f0
Normalize suffixes using existing expression package
pbaumard Jul 16, 2024
6ec91a5
Fix copyright_test
pbaumard Jul 16, 2024
0f4c8cb
Add ValidateDocument in SPDX test
pbaumard Jul 17, 2024
907683d
Fix linter
pbaumard Jul 17, 2024
5074f24
Revert since only Name and Version are checked
pbaumard Jul 24, 2024
8e6f9d3
Revert cycloneDX normalization
pbaumard Jul 24, 2024
b8bb817
More tests
pbaumard Jul 24, 2024
56093d9
Comments
pbaumard Jul 24, 2024
85b8781
Fix golden
pbaumard Jul 24, 2024
39f796e
test(integration): update golden files
DmitriyLewen Jul 25, 2024
5f7cf6d
Fix apk license parsing and use version regexp for license normalization
pbaumard Jul 26, 2024
aff6ebd
Check map in test; make map directly & move TestLaxSplitLicense to no…
pbaumard Aug 1, 2024
0134266
Add field names in test
pbaumard Sep 2, 2024
b85b781
Merge branch 'aquasecurity:main' into feature/better-license-normalize
pbaumard Sep 3, 2024
8663dcd
Golden test update
pbaumard Sep 3, 2024
932e937
Merge remote-tracking branch 'origin/main' into feature/better-licens…
pbaumard Sep 10, 2024
f3f7c39
test: add a file for private functions
knqyf263 Sep 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 54 additions & 55 deletions pkg/licensing/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"strings"
)

var mapping = map[string]string{
var Mapping = map[string]string{
// GPL
"GPL-1": GPL10,
"GPL-1+": GPL10,
Expand All @@ -23,7 +23,8 @@ var mapping = map[string]string{
"GPL-2.0+": GPL20,
"GPL-2.0-OR-LATER": GPL20,
"GPL-2+ WITH AUTOCONF EXCEPTION": GPL20withautoconfexception,
"GPL-2+-with-bison-exception": GPL20withbisonexception,
"GPL-2+-WITH-BISON-EXCEPTION": GPL20withbisonexception,
"GPL2 W/ CPE": GPL20withclasspathexception,
"GPL3": GPL30,
"GPL 3.0": GPL30,
"GPL 3": GPL30,
Expand Down Expand Up @@ -67,65 +68,56 @@ var mapping = map[string]string{
"MPL 2": MPL20,

// BSD
"BSD": BSD3Clause, // 2? 3?
"BSD-2-CLAUSE": BSD2Clause,
"BSD-3-CLAUSE": BSD3Clause,
"BSD-4-CLAUSE": BSD4Clause,
"BSD 2 CLAUSE": BSD2Clause,
"BSD 2-CLAUSE": BSD2Clause,
"BSD 2-CLAUSE LICENSE": BSD2Clause,
"THE BSD 2-CLAUSE LICENSE": BSD2Clause,
"THE 2-CLAUSE BSD LICENSE": BSD2Clause,
"TWO-CLAUSE BSD-STYLE LICENSE": BSD2Clause,
"BSD 3 CLAUSE": BSD3Clause,
"BSD 3-CLAUSE": BSD3Clause,
"BSD 3-CLAUSE LICENSE": BSD3Clause,
"THE BSD 3-CLAUSE LICENSE": BSD3Clause,
"BSD": BSD3Clause, // 2? 3?
"BSD-2-CLAUSE": BSD2Clause,
"BSD-3-CLAUSE": BSD3Clause,
"BSD-4-CLAUSE": BSD4Clause,
"BSD 2 CLAUSE": BSD2Clause,
"BSD 2-CLAUSE": BSD2Clause,
"2-CLAUSE BSD": BSD2Clause,
"TWO-CLAUSE BSD-STYLE": BSD2Clause,
"BSD 3 CLAUSE": BSD3Clause,
"BSD 3-CLAUSE": BSD3Clause,
"BSD 3-CLAUSE \"NEW\" OR \"REVISED\" LICENSE (BSD-3-CLAUSE)": BSD3Clause,
"ECLIPSE DISTRIBUTION LICENSE (NEW BSD LICENSE)": BSD3Clause,
"NEW BSD LICENSE": BSD3Clause,
"MODIFIED BSD LICENSE": BSD3Clause,
"REVISED BSD": BSD3Clause,
"REVISED BSD LICENSE": BSD3Clause,
"THE NEW BSD LICENSE": BSD3Clause,
"3-CLAUSE BSD LICENSE": BSD3Clause,
"BSD 3-CLAUSE NEW LICENSE": BSD3Clause,
"BSD LICENSE": BSD3Clause,
"NEW BSD": BSD3Clause,
"MODIFIED BSD": BSD3Clause,
"REVISED BSD": BSD3Clause,
"3-CLAUSE BSD": BSD3Clause,
"BSD 3-CLAUSE NEW": BSD3Clause,
"BSD LICENSE 3": BSD3Clause,
// Eclipse Distribution License 1.0 is the same as BSD-3-Clause
// See https://wiki.spdx.org/view/Legal_Team/License_List/Licenses_Under_Consideration
"EDL 1.0": BSD3Clause,
"ECLIPSE DISTRIBUTION LICENSE - V 1.0": BSD3Clause,
"ECLIPSE DISTRIBUTION LICENSE V. 1.0": BSD3Clause,
"ECLIPSE DISTRIBUTION LICENSE V1.0": BSD3Clause,
"THE BSD LICENSE": BSD4Clause,

// APACHE
"APACHE LICENSE": Apache10,
"APACHE SOFTWARE LICENSES": Apache10,
"APACHE": Apache20, // 1? 2?
"APACHE 2.0": Apache20,
"APACHE 2": Apache20,
"APACHE V2": Apache20,
"APACHE 2.0 LICENSE": Apache20,
"APACHE SOFTWARE LICENSE, VERSION 2.0": Apache20,
"THE APACHE SOFTWARE LICENSE, VERSION 2.0": Apache20,
"APACHE LICENSE (V2.0)": Apache20,
"APACHE LICENSE 2.0": Apache20,
"APACHE LICENSE V2.0": Apache20,
"APACHE LICENSE VERSION 2.0": Apache20,
"APACHE LICENSE, VERSION 2.0": Apache20,
"APACHE PUBLIC LICENSE 2.0": Apache20,
"APACHE SOFTWARE LICENSE - VERSION 2.0": Apache20,
"THE APACHE LICENSE, VERSION 2.0": Apache20,
"APACHE-2.0 LICENSE": Apache20,
"APACHE 2 STYLE LICENSE": Apache20,
"ASF 2.0": Apache20,
"APACHE": Apache10,
"APACHE SOFTWARE LICENSES": Apache10,
"APACHE 2.0": Apache20,
"APACHE 2": Apache20,
"APACHE V2": Apache20,
"APACHE SOFTWARE LICENSE, VERSION 2.0": Apache20,
"APACHE LICENSE (V2.0)": Apache20,
"APACHE LICENSE 2.0": Apache20,
"APACHE LICENSE V2.0": Apache20,
"APACHE LICENSE VERSION 2.0": Apache20,
"APACHE LICENSE, VERSION 2.0": Apache20,
"APACHE PUBLIC LICENSE 2.0": Apache20,
"APACHE SOFTWARE LICENSE - VERSION 2.0": Apache20,
"APACHE-2.0": Apache20,
"APACHE 2 STYLE": Apache20,
"ASF 2.0": Apache20,

// CC0-1.0
"CC0 1.0 UNIVERSAL": CC010,
"PUBLIC DOMAIN, PER CREATIVE COMMONS CC0": CC010,

// CDDL 1.0
"CDDL 1.0": CDDL10,
"CDDL LICENSE": CDDL10,
"CDDL 1.0": CDDL10,
"CDDL": CDDL10,
"COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) VERSION 1.0": CDDL10,
"COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) V1.0": CDDL10,

Expand All @@ -144,13 +136,17 @@ var mapping = map[string]string{
"ECLIPSE PUBLIC LICENSE (EPL), VERSION 1.0": EPL10,

// EPL 2.0
"ECLIPSE PUBLIC LICENSE - VERSION 2.0": EPL20,
"EPL 2.0": EPL20,
"ECLIPSE PUBLIC LICENSE - V 2.0": EPL20,
"ECLIPSE PUBLIC LICENSE V2.0": EPL20,
"ECLIPSE PUBLIC LICENSE, VERSION 2.0": EPL20,
"THE ECLIPSE PUBLIC LICENSE VERSION 2.0": EPL20,
"ECLIPSE PUBLIC LICENSE V. 2.0": EPL20,
"ECLIPSE PUBLIC LICENSE 1.0": EPL10,
"ECLIPSE PUBLIC LICENSE - VERSION 2.0": EPL20,
"EPL 2.0": EPL20,
"ECLIPSE PUBLIC LICENSE - V 2.0": EPL20,
"ECLIPSE PUBLIC LICENSE V2.0": EPL20,
"ECLIPSE PUBLIC LICENSE, VERSION 2.0": EPL20,
"ECLIPSE PUBLIC LICENSE VERSION 2.0": EPL20,
"ECLIPSE PUBLIC LICENSE V. 2.0": EPL20,

// MIT No Attribution (MIT-0) is not yet supported by google/licenseclassifier
"MIT-0": MIT,

"RUBY": Ruby,
"ZLIB": Zlib,
Expand Down Expand Up @@ -181,7 +177,10 @@ var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)")

func Normalize(name string) string {
name = strings.TrimSpace(name)
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved
if l, ok := mapping[strings.ToUpper(name)]; ok {
name = strings.ToUpper(name)
name = strings.TrimPrefix(name, "THE ")
name = strings.TrimSuffix(name, " LICENSE")
if l, ok := Mapping[name]; ok {
return l
}
return name
Expand Down
34 changes: 34 additions & 0 deletions pkg/licensing/normalize_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,47 @@
package licensing_test

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"

"github.com/aquasecurity/trivy/pkg/licensing"
)

func TestNormalize(t *testing.T) {
tests := []struct {
licenses []string
normalized string
}{
{
[]string{
"APACHE",
" APACHE ",
"APACHE License",
"The Apache License",
"THE APACHE LICENSE",
" THE APACHE LICENSE ",
},
"Apache-1.0",
},
}
for _, tt := range tests {
t.Run(tt.normalized, func(t *testing.T) {
for _, ll := range tt.licenses {
res := licensing.Normalize(ll)
assert.Equal(t, tt.normalized, res)
}
})
}
t.Run("All mapping keys must be uppercase", func(t *testing.T) {
for k := range licensing.Mapping {
res := strings.ToUpper(k)
assert.Equal(t, k, res)
}
})
}
DmitriyLewen marked this conversation as resolved.
Show resolved Hide resolved

func TestSplitLicenses(t *testing.T) {
tests := []struct {
name string
Expand Down