1
1
# Copyright (C) 2021 Intel Corporation
2
2
# SPDX-License-Identifier: GPL-3.0-or-later
3
+ from __future__ import annotations
3
4
4
5
import json
5
6
import os
6
7
import subprocess
7
8
import sys
9
+ from logging import Logger
8
10
from re import MULTILINE , compile , search
9
- from typing import List
11
+ from typing import Iterator
10
12
11
13
import defusedxml .ElementTree as ET
12
14
15
+ from cve_bin_tool .checkers import Checker
13
16
from cve_bin_tool .cvedb import CVEDB
14
17
from cve_bin_tool .egg_updater import IS_DEVELOP , update_egg
15
18
from cve_bin_tool .error_handler import ErrorMode
16
- from cve_bin_tool .extractor import Extractor
19
+ from cve_bin_tool .extractor import Extractor , TempDirExtractorContext
17
20
from cve_bin_tool .file import is_binary
18
21
from cve_bin_tool .log import LOGGER
19
22
from cve_bin_tool .strings import Strings
20
- from cve_bin_tool .util import DirWalk , ProductInfo , inpath
23
+ from cve_bin_tool .util import DirWalk , ProductInfo , ScanInfo , inpath
21
24
22
25
if sys .version_info >= (3 , 8 ):
23
26
from importlib import metadata as importlib_metadata
@@ -36,12 +39,12 @@ class VersionScanner:
36
39
37
40
def __init__ (
38
41
self ,
39
- should_extract = False ,
40
- exclude_folders = [],
41
- checkers = None ,
42
- logger = None ,
43
- error_mode = ErrorMode .TruncTrace ,
44
- score = 0 ,
42
+ should_extract : bool = False ,
43
+ exclude_folders : list [ str ] = [],
44
+ checkers : dict [ str , type [ Checker ]] | None = None ,
45
+ logger : Logger | None = None ,
46
+ error_mode : ErrorMode = ErrorMode .TruncTrace ,
47
+ score : int = 0 ,
45
48
):
46
49
self .logger = logger or LOGGER .getChild (self .__class__ .__name__ )
47
50
# Update egg if installed in development mode
@@ -62,13 +65,13 @@ def __init__(
62
65
)
63
66
).walk
64
67
self .should_extract = should_extract
65
- self .file_stack = []
68
+ self .file_stack : list [ str ] = []
66
69
self .error_mode = error_mode
67
70
self .cve_db = CVEDB ()
68
71
# self.logger.info("Checkers loaded: %s" % (", ".join(self.checkers.keys())))
69
72
70
73
@classmethod
71
- def load_checkers (cls ):
74
+ def load_checkers (cls ) -> dict [ str , type [ Checker ]] :
72
75
"""Loads CVE checkers"""
73
76
checkers = dict (
74
77
map (
@@ -79,12 +82,12 @@ def load_checkers(cls):
79
82
return checkers
80
83
81
84
@classmethod
82
- def available_checkers (cls ):
85
+ def available_checkers (cls ) -> list [ str ] :
83
86
checkers = importlib_metadata .entry_points ()[cls .CHECKER_ENTRYPOINT ]
84
87
checker_list = [item .name for item in checkers ]
85
88
return checker_list
86
89
87
- def remove_skiplist (self , skips ) :
90
+ def remove_skiplist (self , skips : list [ str ]) -> None :
88
91
# Take out any checkers that are on the skip list
89
92
# (string of comma-delimited checker names)
90
93
skiplist = skips
@@ -95,20 +98,21 @@ def remove_skiplist(self, skips):
95
98
else :
96
99
self .logger .error (f"Checker { skipme } is not a valid checker name" )
97
100
98
- def print_checkers (self ):
101
+ def print_checkers (self ) -> None :
99
102
self .logger .info (f'Checkers: { ", " .join (self .checkers .keys ())} ' )
100
103
101
- def number_of_checkers (self ):
104
+ def number_of_checkers (self ) -> int :
102
105
return len (self .checkers )
103
106
104
- def is_executable (self , filename ) :
107
+ def is_executable (self , filename : str ) -> tuple [ bool , str | None ] :
105
108
"""check if file is an ELF binary file"""
106
109
107
- output = None
110
+ output : str | None = None
108
111
if inpath ("file" ):
109
112
# use system file if available (for performance reasons)
110
- output = subprocess .check_output (["file" , filename ])
111
- output = output .decode (sys .stdout .encoding )
113
+ output = subprocess .check_output (["file" , filename ]).decode (
114
+ sys .stdout .encoding
115
+ )
112
116
113
117
if "cannot open" in output :
114
118
self .logger .warning (f"Unopenable file { filename } cannot be scanned" )
@@ -133,7 +137,7 @@ def is_executable(self, filename):
133
137
134
138
return True , output
135
139
136
- def parse_strings (self , filename ) :
140
+ def parse_strings (self , filename : str ) -> str :
137
141
"""parse binary file's strings"""
138
142
139
143
if inpath ("strings" ):
@@ -145,7 +149,7 @@ def parse_strings(self, filename):
145
149
lines = s .parse ()
146
150
return lines
147
151
148
- def scan_file (self , filename ) :
152
+ def scan_file (self , filename : str ) -> Iterator [ ScanInfo ] :
149
153
"""Scans a file to see if it contains any of the target libraries,
150
154
and whether any of those contain CVEs"""
151
155
@@ -185,7 +189,9 @@ def scan_file(self, filename):
185
189
186
190
yield from self .run_checkers (filename , lines )
187
191
188
- def find_java_vendor (self , product , version ):
192
+ def find_java_vendor (
193
+ self , product : str , version : str
194
+ ) -> tuple [ProductInfo , str ] | tuple [None , None ]:
189
195
"""Find vendor for Java product"""
190
196
vendor_package_pair = self .cve_db .get_vendor_product_pairs (product )
191
197
# If no match, try alternative product name.
@@ -205,7 +211,7 @@ def find_java_vendor(self, product, version):
205
211
return ProductInfo (vendor , product , version ), file_path
206
212
return None , None
207
213
208
- def run_java_checker (self , filename : str ) -> None :
214
+ def run_java_checker (self , filename : str ) -> Iterator [ ScanInfo ] :
209
215
"""Process maven pom.xml file and extract product and dependency details"""
210
216
tree = ET .parse (filename )
211
217
# Find root element
@@ -231,7 +237,7 @@ def run_java_checker(self, filename: str) -> None:
231
237
if product is not None and version is not None :
232
238
product_info , file_path = self .find_java_vendor (product , version )
233
239
if file_path is not None :
234
- yield product_info , file_path
240
+ yield ScanInfo ( product_info , file_path )
235
241
236
242
# Scan for any dependencies referenced in file
237
243
dependencies = root .find (schema + "dependencies" )
@@ -249,16 +255,16 @@ def run_java_checker(self, filename: str) -> None:
249
255
product .text , version
250
256
)
251
257
if file_path is not None :
252
- yield product_info , file_path
258
+ yield ScanInfo ( product_info , file_path )
253
259
254
260
self .logger .debug (f"Done scanning file: { filename } " )
255
261
256
- def find_js_vendor (self , product : str , version : str ) -> List [ List [ str ]] :
262
+ def find_js_vendor (self , product : str , version : str ) -> list [ ScanInfo ] | None :
257
263
"""Find vendor for Javascript product"""
258
264
if version == "*" :
259
265
return None
260
266
vendor_package_pair = self .cve_db .get_vendor_product_pairs (product )
261
- vendorlist : List [ List [ str ] ] = []
267
+ vendorlist : list [ ScanInfo ] = []
262
268
if vendor_package_pair != []:
263
269
# To handle multiple vendors, return all combinations of product/vendor mappings
264
270
for v in vendor_package_pair :
@@ -268,20 +274,21 @@ def find_js_vendor(self, product: str, version: str) -> List[List[str]]:
268
274
if "^" in version :
269
275
version = version [1 :]
270
276
self .logger .debug (f"{ file_path } { product } { version } by { vendor } " )
271
- vendorlist .append ([ProductInfo (vendor , product , version ), file_path ])
277
+ vendorlist .append (
278
+ ScanInfo (ProductInfo (vendor , product , version ), file_path )
279
+ )
272
280
return vendorlist if len (vendorlist ) > 0 else None
273
281
return None
274
282
275
- def run_js_checker (self , filename : str ) -> None :
283
+ def run_js_checker (self , filename : str ) -> Iterator [ ScanInfo ] :
276
284
"""Process package-lock.json file and extract product and dependency details"""
277
285
fh = open (filename )
278
286
data = json .load (fh )
279
287
product = data ["name" ]
280
288
version = data ["version" ]
281
289
vendor = self .find_js_vendor (product , version )
282
290
if vendor is not None :
283
- for v in vendor :
284
- yield v [0 ], v [1 ] # product_info, file_path
291
+ yield from vendor
285
292
# Now process dependencies
286
293
for i in data ["dependencies" ]:
287
294
# To handle @actions/<product>: lines, extract product name from line
@@ -299,20 +306,20 @@ def run_js_checker(self, filename: str) -> None:
299
306
version = data ["dependencies" ][i ]
300
307
vendor = self .find_js_vendor (product , version )
301
308
if vendor is not None :
302
- for v in vendor :
303
- yield v [0 ], v [1 ] # product_info, file_path
309
+ yield from vendor
304
310
if "requires" in data ["dependencies" ][i ]:
305
311
for r in data ["dependencies" ][i ]["requires" ]:
306
312
# To handle @actions/<product>: lines, extract product name from line
307
313
product = r .split ("/" )[1 ] if "/" in r else r
308
314
version = data ["dependencies" ][i ]["requires" ][r ]
309
315
vendor = self .find_js_vendor (product , version )
310
316
if vendor is not None :
311
- for v in vendor :
312
- yield v [0 ], v [1 ] # product_info, file_path
317
+ yield from vendor
313
318
self .logger .debug (f"Done scanning file: { filename } " )
314
319
315
- def run_python_package_checkers (self , filename , lines ):
320
+ def run_python_package_checkers (
321
+ self , filename : str , lines : str
322
+ ) -> Iterator [ScanInfo ]:
316
323
"""
317
324
This generator runs only for python packages.
318
325
There are no actual checkers.
@@ -331,15 +338,15 @@ def run_python_package_checkers(self, filename, lines):
331
338
332
339
self .logger .info (f"{ file_path } is { product } { version } " )
333
340
334
- yield ProductInfo (vendor , product , version ), file_path
341
+ yield ScanInfo ( ProductInfo (vendor , product , version ), file_path )
335
342
336
343
# There are packages with a METADATA file in them containing different data from what the tool expects
337
344
except AttributeError :
338
345
self .logger .debug (f"{ filename } is an invalid METADATA/PKG-INFO" )
339
346
340
347
self .logger .debug (f"Done scanning file: { filename } " )
341
348
342
- def run_checkers (self , filename , lines ) :
349
+ def run_checkers (self , filename : str , lines : str ) -> Iterator [ ScanInfo ] :
343
350
# tko
344
351
for (dummy_checker_name , checker ) in self .checkers .items ():
345
352
checker = checker ()
@@ -370,12 +377,14 @@ def run_checkers(self, filename, lines):
370
377
f'{ file_path } { result ["is_or_contains" ]} { dummy_checker_name } { version } '
371
378
)
372
379
for vendor , product in checker .VENDOR_PRODUCT :
373
- yield ProductInfo (vendor , product , version ), file_path
380
+ yield ScanInfo (
381
+ ProductInfo (vendor , product , version ), file_path
382
+ )
374
383
375
384
self .logger .debug (f"Done scanning file: { filename } " )
376
385
377
386
@staticmethod
378
- def clean_file_path (filepath ) :
387
+ def clean_file_path (filepath : str ) -> str :
379
388
"""Returns a cleaner filepath by removing temp path from filepath"""
380
389
381
390
# we'll recieve a filepath similar to
@@ -387,7 +396,9 @@ def clean_file_path(filepath):
387
396
start_point = filepath .find ("extracted" ) + 9
388
397
return filepath [start_point :]
389
398
390
- def scan_and_or_extract_file (self , ectx , filepath ):
399
+ def scan_and_or_extract_file (
400
+ self , ectx : TempDirExtractorContext , filepath : str
401
+ ) -> Iterator [ScanInfo ]:
391
402
"""Runs extraction if possible and desired otherwise scans."""
392
403
# Scan the file
393
404
yield from self .scan_file (filepath )
@@ -404,7 +415,7 @@ def scan_and_or_extract_file(self, ectx, filepath):
404
415
yield from self .scan_and_or_extract_file (ectx , filename )
405
416
self .file_stack .pop ()
406
417
407
- def recursive_scan (self , scan_path ) :
418
+ def recursive_scan (self , scan_path : str ) -> Iterator [ ScanInfo ] :
408
419
with Extractor (logger = self .logger , error_mode = self .error_mode ) as ectx :
409
420
if os .path .isdir (scan_path ):
410
421
for filepath in self .walker ([scan_path ]):
0 commit comments