Skip to content

Commit 4926da7

Browse files
afdeskknqyf263
andauthoredSep 5, 2024··
fix(license): stop spliting a long license text (#7336)
Signed-off-by: knqyf263 <[email protected]> Co-authored-by: knqyf263 <[email protected]>
1 parent 7a1e8b8 commit 4926da7

File tree

11 files changed

+192
-72
lines changed

11 files changed

+192
-72
lines changed
 

‎pkg/dependency/parser/python/packaging/parse.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func (p *Parser) Parse(r xio.ReadSeekerAt) ([]ftypes.Package, []ftypes.Dependenc
8080
}
8181

8282
if license == "" && h.Get("License-File") != "" {
83-
license = "file://" + h.Get("License-File")
83+
license = licensing.LicenseFilePrefix + h.Get("License-File")
8484
}
8585

8686
return []ftypes.Package{

‎pkg/fanal/analyzer/language/python/packaging/packaging.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,11 @@ func (a packagingAnalyzer) fillAdditionalData(fsys fs.FS, app *types.Application
122122
// Parser adds `file://` prefix to filepath from `License-File` field
123123
// We need to read this file to find licenses
124124
// Otherwise, this is the name of the license
125-
if !strings.HasPrefix(lic, "file://") {
125+
if !strings.HasPrefix(lic, licensing.LicenseFilePrefix) {
126126
licenses = append(licenses, lic)
127127
continue
128128
}
129-
licenseFilePath := path.Base(strings.TrimPrefix(lic, "file://"))
129+
licenseFilePath := path.Base(strings.TrimPrefix(lic, licensing.LicenseFilePrefix))
130130

131131
findings, err := classifyLicense(app.FilePath, licenseFilePath, a.licenseClassifierConfidenceLevel, fsys)
132132
if err != nil {

‎pkg/licensing/normalize.go

+45
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,12 @@ var mapping = map[string]string{
159159
"PUBLIC DOMAIN": Unlicense,
160160
}
161161

162+
const (
163+
LicenseTextPrefix = "text://"
164+
LicenseFilePrefix = "file://"
165+
CustomLicensePrefix = "CUSTOM License"
166+
)
167+
162168
// pythonLicenseExceptions contains licenses that we cannot separate correctly using our logic.
163169
// first word after separator (or/and) => license name
164170
var pythonLicenseExceptions = map[string]string{
@@ -179,6 +185,39 @@ var pythonLicenseExceptions = map[string]string{
179185

180186
var licenseSplitRegexp = regexp.MustCompile("(,?[_ ]+(?:or|and)[_ ]+)|(,[ ]*)")
181187

188+
// Typical keywords for license texts
189+
var licenseTextKeywords = []string{
190+
"http://",
191+
"https://",
192+
"(c)",
193+
"as-is",
194+
";",
195+
"hereby",
196+
"permission to use",
197+
"permission is",
198+
"use in source",
199+
"use, copy, modify",
200+
"using",
201+
}
202+
203+
func isLicenseText(str string) bool {
204+
for _, keyword := range licenseTextKeywords {
205+
if strings.Contains(str, keyword) {
206+
return true
207+
}
208+
}
209+
return false
210+
}
211+
212+
func TrimLicenseText(text string) string {
213+
s := strings.Split(text, " ")
214+
n := len(s)
215+
if n > 3 {
216+
n = 3
217+
}
218+
return strings.Join(s[:n], " ") + "..."
219+
}
220+
182221
func Normalize(name string) string {
183222
name = strings.TrimSpace(name)
184223
if l, ok := mapping[strings.ToUpper(name)]; ok {
@@ -191,6 +230,12 @@ func SplitLicenses(str string) []string {
191230
if str == "" {
192231
return nil
193232
}
233+
if isLicenseText(strings.ToLower(str)) {
234+
return []string{
235+
LicenseTextPrefix + str,
236+
}
237+
}
238+
194239
var licenses []string
195240
for _, maybeLic := range licenseSplitRegexp.Split(str, -1) {
196241
lower := strings.ToLower(maybeLic)

‎pkg/licensing/normalize_test.go

+7
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,13 @@ func TestSplitLicenses(t *testing.T) {
9797
"Historical Permission Notice and Disclaimer (HPND)",
9898
},
9999
},
100+
{
101+
name: "License text",
102+
license: "* Permission to use this software in any way is granted without",
103+
licenses: []string{
104+
"text://* Permission to use this software in any way is granted without",
105+
},
106+
},
100107
}
101108

102109
for _, tt := range tests {

‎pkg/rpc/convert.go

+2
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ func ConvertFromRPCDetectedLicenses(rpcLicenses []*common.DetectedLicense) []typ
436436
PkgName: l.PkgName,
437437
FilePath: l.FilePath,
438438
Name: l.Name,
439+
Text: l.Text,
439440
Confidence: float64(l.Confidence),
440441
Link: l.Link,
441442
})
@@ -983,6 +984,7 @@ func ConvertToRPCLicenses(licenses []types.DetectedLicense) []*common.DetectedLi
983984
PkgName: l.PkgName,
984985
FilePath: l.FilePath,
985986
Name: l.Name,
987+
Text: l.Text,
986988
Confidence: float32(l.Confidence),
987989
Link: l.Link,
988990
})

‎pkg/rpc/convert_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -760,6 +760,7 @@ func TestConvertFromRPCLicenses(t *testing.T) {
760760
PkgName: "alpine-baselayout",
761761
FilePath: "some-path",
762762
Name: "GPL-2.0",
763+
Text: "text://* Permission to use this software in any way is granted without",
763764
Confidence: 1,
764765
Link: "https://some-link",
765766
},
@@ -771,6 +772,7 @@ func TestConvertFromRPCLicenses(t *testing.T) {
771772
PkgName: "alpine-baselayout",
772773
FilePath: "some-path",
773774
Name: "GPL-2.0",
775+
Text: "text://* Permission to use this software in any way is granted without",
774776
Confidence: 1,
775777
Link: "https://some-link",
776778
},
@@ -806,6 +808,7 @@ func TestConvertToRPCLicenses(t *testing.T) {
806808
PkgName: "alpine-baselayout",
807809
FilePath: "some-path",
808810
Name: "GPL-2.0",
811+
Text: "text://* Permission to use this software in any way is granted without",
809812
Confidence: 1,
810813
Link: "https://some-link",
811814
},
@@ -817,6 +820,7 @@ func TestConvertToRPCLicenses(t *testing.T) {
817820
PkgName: "alpine-baselayout",
818821
FilePath: "some-path",
819822
Name: "GPL-2.0",
823+
Text: "text://* Permission to use this software in any way is granted without",
820824
Confidence: 1,
821825
Link: "https://some-link",
822826
},

‎pkg/scanner/local/scan.go

+29-19
Original file line numberDiff line numberDiff line change
@@ -261,14 +261,7 @@ func (s Scanner) scanLicenses(target types.ScanTarget, options types.ScanOptions
261261
var osPkgLicenses []types.DetectedLicense
262262
for _, pkg := range target.Packages {
263263
for _, license := range pkg.Licenses {
264-
category, severity := scanner.Scan(license)
265-
osPkgLicenses = append(osPkgLicenses, types.DetectedLicense{
266-
Severity: severity,
267-
Category: category,
268-
PkgName: pkg.Name,
269-
Name: license,
270-
Confidence: 1.0,
271-
})
264+
osPkgLicenses = append(osPkgLicenses, toDetectedLicense(scanner, license, pkg.Name, ""))
272265
}
273266
}
274267
results = append(results, types.Result{
@@ -282,17 +275,11 @@ func (s Scanner) scanLicenses(target types.ScanTarget, options types.ScanOptions
282275
var langLicenses []types.DetectedLicense
283276
for _, lib := range app.Packages {
284277
for _, license := range lib.Licenses {
285-
category, severity := scanner.Scan(license)
286-
langLicenses = append(langLicenses, types.DetectedLicense{
287-
Severity: severity,
288-
Category: category,
289-
PkgName: lib.Name,
290-
Name: license,
291-
// Lock files use app.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L245-L246
292-
// Applications use lib.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L93-L94
293-
FilePath: lo.Ternary(lib.FilePath != "", lib.FilePath, app.FilePath),
294-
Confidence: 1.0,
295-
})
278+
// Lock files use app.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L245-L246
279+
// Applications use lib.FilePath - https://github.com/aquasecurity/trivy/blob/6ccc0a554b07b05fd049f882a1825a0e1e0aabe1/pkg/fanal/types/artifact.go#L93-L94
280+
filePath := lo.Ternary(lib.FilePath != "", lib.FilePath, app.FilePath)
281+
282+
langLicenses = append(langLicenses, toDetectedLicense(scanner, license, lib.Name, filePath))
296283
}
297284
}
298285

@@ -390,6 +377,29 @@ func toDetectedMisconfiguration(res ftypes.MisconfResult, defaultSeverity dbType
390377
}
391378
}
392379

380+
func toDetectedLicense(scanner licensing.Scanner, license, pkgName, filePath string) types.DetectedLicense {
381+
var category ftypes.LicenseCategory
382+
var severity, licenseText string
383+
if strings.HasPrefix(license, licensing.LicenseTextPrefix) { // License text
384+
licenseText = strings.TrimPrefix(license, licensing.LicenseTextPrefix)
385+
category = ftypes.CategoryUnknown
386+
severity = dbTypes.SeverityUnknown.String()
387+
license = licensing.CustomLicensePrefix + ": " + licensing.TrimLicenseText(licenseText)
388+
} else { // License name
389+
category, severity = scanner.Scan(license)
390+
}
391+
392+
return types.DetectedLicense{
393+
Severity: severity,
394+
Category: category,
395+
PkgName: pkgName,
396+
FilePath: filePath,
397+
Name: license,
398+
Text: licenseText,
399+
Confidence: 1.0,
400+
}
401+
}
402+
393403
func ShouldScanMisconfigOrRbac(scanners types.Scanners) bool {
394404
return scanners.AnyEnabled(types.MisconfigScanner, types.RBACScanner)
395405
}

‎pkg/scanner/local/scan_test.go

+38
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,25 @@ var (
6464
},
6565
Licenses: []string{"MIT"},
6666
}
67+
python39min = ftypes.Package{
68+
Name: "python3.9-minimal",
69+
Version: "3.9.1",
70+
FilePath: "/usr/lib/python/site-packages/python3.9-minimal/METADATA",
71+
Layer: ftypes.Layer{
72+
DiffID: "sha256:0ea33a93585cf1917ba522b2304634c3073654062d5282c1346322967790ef33",
73+
},
74+
Licenses: []string{"text://Redistribution and use in source and binary forms, with or without"},
75+
}
76+
menuinstPkg = ftypes.Package{
77+
Name: "menuinst",
78+
Version: "2.0.2",
79+
FilePath: "opt/conda/lib/python3.11/site-packages/menuinst-2.0.2.dist-info/METADATA",
80+
Layer: ftypes.Layer{
81+
DiffID: "sha256:0ea33a93585cf1917ba522b2304634c3073654062d5282c1346322967790ef33",
82+
},
83+
Licenses: []string{"text://(c) 2016 Continuum Analytics, Inc. / http://continuum.io All Rights Reserved"},
84+
}
85+
6786
laravelPkg = ftypes.Package{
6887
Name: "laravel/framework",
6988
Version: "6.0.0",
@@ -225,6 +244,7 @@ func TestScanner_Scan(t *testing.T) {
225244
},
226245
Packages: []ftypes.Package{
227246
muslPkg,
247+
python39min,
228248
},
229249
Applications: []ftypes.Application{
230250
{
@@ -239,6 +259,7 @@ func TestScanner_Scan(t *testing.T) {
239259
FilePath: "",
240260
Packages: []ftypes.Package{
241261
urllib3Pkg,
262+
menuinstPkg,
242263
},
243264
},
244265
},
@@ -257,6 +278,14 @@ func TestScanner_Scan(t *testing.T) {
257278
Name: "MIT",
258279
Confidence: 1,
259280
},
281+
{
282+
Severity: "UNKNOWN",
283+
Category: "unknown",
284+
PkgName: python39min.Name,
285+
Name: "CUSTOM License: Redistribution and use...",
286+
Text: "Redistribution and use in source and binary forms, with or without",
287+
Confidence: 1,
288+
},
260289
},
261290
},
262291
{
@@ -286,6 +315,15 @@ func TestScanner_Scan(t *testing.T) {
286315
Name: "MIT",
287316
Confidence: 1,
288317
},
318+
{
319+
Severity: "UNKNOWN",
320+
Category: "unknown",
321+
PkgName: menuinstPkg.Name,
322+
FilePath: "opt/conda/lib/python3.11/site-packages/menuinst-2.0.2.dist-info/METADATA",
323+
Name: "CUSTOM License: (c) 2016 Continuum...",
324+
Text: "(c) 2016 Continuum Analytics, Inc. / http://continuum.io All Rights Reserved",
325+
Confidence: 1,
326+
},
289327
},
290328
},
291329
{

‎pkg/types/license.go

+3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ type DetectedLicense struct {
2222
// Name holds a detected license name
2323
Name string
2424

25+
// Text holds a long license text if Trivy detects a license name as a license text
26+
Text string
27+
2528
// Confidence is level of the match. The confidence level is between 0.0 and 1.0, with 1.0 indicating an
2629
// exact match and 0.0 indicating a complete mismatch
2730
Confidence float64

0 commit comments

Comments
 (0)
Please sign in to comment.