diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml index c1d82f9..143212c 100644 --- a/.github/workflows/pythonapp.yml +++ b/.github/workflows/pythonapp.yml @@ -29,7 +29,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install .[dev] + pip install ".[dev]" - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names @@ -45,16 +45,3 @@ jobs: run: | pip uninstall -y orjson pytest --cov=vdb test - - name: Self sast-scan - uses: AppThreat/sast-scan-action@master - with: - output: reports - type: python,credscan - env: - SCAN_ID: ${{ github.sha }} - WORKSPACE: https://github.com/${{ github.repository }}/blob/${{ github.sha }} - - name: Upload scan reports - uses: actions/upload-artifact@v1.0.0 - with: - name: sast-scan-reports - path: reports diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml index c5ac07f..0727f87 100644 --- a/.github/workflows/pythonpublish.yml +++ b/.github/workflows/pythonpublish.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/pyproject.toml b/pyproject.toml index edb6564..abea3b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "appthreat-vulnerability-db" -version = "5.1.4" +version = "5.2.0" description = "AppThreat's vulnerability database and package search library with a built-in file based storage. OSV, CVE, GitHub, npm are the primary sources of vulnerabilities." authors = [ {name = "Team AppThreat", email = "cloud@appthreat.com"}, @@ -24,10 +24,10 @@ classifiers = [ "Intended Audience :: System Administrators", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.8", "Topic :: Security", "Topic :: Utilities", ] @@ -48,7 +48,8 @@ dev = [ "bandit", "flake8", "pytest", - "pytest-cov",] + "pytest-cov" +] [tool.setuptools] packages = ["test", "vdb", "vdb.lib"] diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index c744cad..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,7 +0,0 @@ --r requirements.txt - -black -bandit -flake8 -pytest -pytest-cov diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index f80d51b..0000000 --- a/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -httpx[http2] -appdirs -tabulate -msgpack==1.0.5 -orjson -semver>=3.0.0 -packageurl-python -cvss diff --git a/test/test_db.py b/test/test_db.py index bdfec95..0cb7f32 100644 --- a/test/test_db.py +++ b/test/test_db.py @@ -57,7 +57,7 @@ def test_search_slow(test_db, test_vuln_data): res = db.pkg_search( table, d["details"]["package"], - d["details"]["max_affected_version_including"], + d["details"]["mai"], ) assert len(res) assert res[0].to_dict()["package_issue"] @@ -74,7 +74,7 @@ def test_search_fast(test_db, test_vuln_data): search_list = [ { "name": d["details"]["package"], - "version": d["details"]["max_affected_version_including"], + "version": d["details"]["mai"], } for d in all_data ] @@ -96,7 +96,7 @@ def test_gha_search_slow(test_db, test_gha_data): all_data = db.list_all(table) assert all_data for d in all_data: - version = d["details"]["max_affected_version_including"] + version = d["details"]["mai"] if version and version != "*": res = db.pkg_search( table, @@ -117,7 +117,7 @@ def test_gha_vendor_search(test_db, test_gha_data): assert all_data for d in all_data: vendor, _, _, cve_type = parse_cpe(d["details"]["cpe_uri"]) - version = d["details"]["max_affected_version_including"] + version = d["details"]["mai"] if version and version != "*": res = db.vendor_pkg_search( table, @@ -140,10 +140,10 @@ def test_gha_search_bulk(test_db, test_gha_data): tmp_list = [ { "name": d["details"]["package"], - "version": d["details"]["max_affected_version_including"], + "version": d["details"]["mai"], } for d in all_data - if d["details"]["max_affected_version_including"] != "*" + if d["details"]["mai"] != "*" ] res = db.bulk_index_search(tmp_list) assert len(res) @@ -160,7 +160,7 @@ def test_index_search(test_db, test_vuln_data): assert all_data tmp_list = [] for d in all_data[:40]: - version = d["details"]["max_affected_version_including"] + version = d["details"]["mai"] if version and version != "*": tmp_list.append({"name": d["details"]["package"], "version": version}) res = db.bulk_index_search(tmp_list) @@ -187,7 +187,7 @@ def test_vendor_index_search(test_db, test_vuln_data): { "vendor": vendor, "name": d["details"]["package"], - "version": d["details"]["max_affected_version_including"], + "version": d["details"]["mai"], } ) res = db.bulk_index_search(tmp_list) diff --git a/vdb/cli.py b/vdb/cli.py index ef3d761..bee8a74 100644 --- a/vdb/cli.py +++ b/vdb/cli.py @@ -5,10 +5,11 @@ import logging import os import re +import shutil from tabulate import tabulate -from vdb.lib import config as config +from vdb.lib import config from vdb.lib import db as dbLib from vdb.lib.aqua import AquaSource from vdb.lib.gha import GitHubSource @@ -134,21 +135,18 @@ def print_results(results): ] for res in results: vuln_occ_dict = res.to_dict() - id = vuln_occ_dict.get("id") + vid = vuln_occ_dict.get("id") package_type = vuln_occ_dict.get("type") - if id not in added_list: + if vid not in added_list: package_issue = res.package_issue full_pkg = package_issue.affected_location.package if package_issue.affected_location.vendor: - full_pkg = "{}:{}".format( - package_issue.affected_location.vendor, - package_issue.affected_location.package, - ) + full_pkg = f"{package_issue.affected_location.vendor}:{package_issue.affected_location.package}" if package_type and package_type not in ("*", "o", "h"): full_pkg = package_type + ":" + full_pkg table.append( [ - id, + vid, full_pkg, package_issue.affected_location.version, package_issue.fixed_location, @@ -158,19 +156,17 @@ def print_results(results): vuln_occ_dict.get("short_description"), ] ) - added_list.append(id) + added_list.append(vid) print(tabulate(table, headers, tablefmt="grid")) def main(): + """Main function""" args = build_args() print(at_logo) if args.clean: if os.path.exists(config.data_dir): - try: - os.rmdir(config.data_dir) - except Exception: - pass + shutil.rmtree(config.data_dir, ignore_errors=True) if args.cache or args.cache_os: if args.only_osv: sources = [OSVSource()] @@ -181,19 +177,19 @@ def main(): if args.cache_os: sources.insert(0, AquaSource()) for s in sources: - LOG.info("Refreshing {}".format(s.__class__.__name__)) + LOG.info("Refreshing %s", s.__class__.__name__) s.refresh() elif args.sync: for s in [GitHubSource(), NvdSource()]: - LOG.info("Syncing {}".format(s.__class__.__name__)) + LOG.info("Syncing %s", s.__class__.__name__) s.download_recent() if args.sync_npm: for s in [NpmSource()]: - LOG.info("Syncing {}".format(s.__class__.__name__)) + LOG.info("Syncing %s", s.__class__.__name__) s.download_recent() if args.sync_github: for s in [GitHubSource()]: - LOG.info("Syncing {}".format(s.__class__.__name__)) + LOG.info("Syncing %s", s.__class__.__name__) s.download_recent() if args.search_npm: source = NpmSource() @@ -204,7 +200,7 @@ def main(): results = dbLib.list_all_occurrence(db) print_results(results) elif args.search: - LOG.info("Vulnerability database loaded from {}".format(config.vdb_bin_file)) + LOG.info("Vulnerability database loaded from %s", config.vdb_bin_file) db = dbLib.get() search_list = re.split(r"[,|;]", args.search) for pkg_info in search_list: diff --git a/vdb/lib/__init__.py b/vdb/lib/__init__.py index d5bbbfe..e5dc5bd 100644 --- a/vdb/lib/__init__.py +++ b/vdb/lib/__init__.py @@ -207,12 +207,12 @@ def __init__( # Occasionally, NVD CPE value could be invalid. We need to guard against this if parts: self.package = package if package else parts.group("package") - self.min_affected_version_including = ( + self.mii = ( min_affected_version_including if min_affected_version_including else parts.group("version") ) - self.max_affected_version_including = ( + self.mai = ( max_affected_version_including if max_affected_version_including else parts.group("version") @@ -225,20 +225,20 @@ def __init__( if len(cpe_parts) > 4: package_workaround = cpe_parts[4] self.package = package if package else package_workaround - self.min_affected_version_including = ( + self.mii = ( min_affected_version_including if min_affected_version_including else "*" ) - self.max_affected_version_including = ( + self.mai = ( max_affected_version_including if max_affected_version_including else "*" ) - self.min_affected_version_excluding = ( + self.mie = ( min_affected_version_excluding if min_affected_version_excluding else None ) - self.max_affected_version_excluding = ( + self.mae = ( max_affected_version_excluding if max_affected_version_excluding else None ) self.severity = Severity.from_str(severity) @@ -288,10 +288,10 @@ def from_dict(detail): return VulnerabilityDetail( detail.get("cpe_uri"), detail.get("package"), - detail.get("min_affected_version_including"), - detail.get("max_affected_version_including"), - detail.get("min_affected_version_excluding"), - detail.get("max_affected_version_excluding"), + detail.get("mii"), + detail.get("mai"), + detail.get("mie"), + detail.get("mae"), detail.get("severity"), detail.get("description"), detail.get("fixed_location"), @@ -308,21 +308,21 @@ def __init__( self, affected_location, fixed_location, - min_affected_version_including=None, - max_affected_version_including=None, - min_affected_version_excluding=None, - max_affected_version_excluding=None, + mii=None, + mai=None, + mie=None, + mae=None, ): self.affected_location = VulnerabilityLocation.from_values( affected_location, - min_affected_version_including, - max_affected_version_including, - min_affected_version_excluding, - max_affected_version_excluding, + mii, + mai, + mie, + mae, ) # If there is no fixed_location but there is max excluded version then consider that as the fix - if not fixed_location and max_affected_version_excluding: - self.fixed_location = max_affected_version_excluding + if not fixed_location and mae: + self.fixed_location = mae else: if fixed_location and fixed_location.startswith("cpe"): # Extract the fixed version from fixed_location cpe uri @@ -408,36 +408,26 @@ def __init__(self, cpe_uri, vendor, package, version): @staticmethod def from_values( cpe_uri, - min_affected_version_including=None, - max_affected_version_including=None, - min_affected_version_excluding=None, - max_affected_version_excluding=None, + mii=None, + mai=None, + mie=None, + mae=None, ): - if ( - not cpe_uri - and not min_affected_version_including - and not max_affected_version_including - and not min_affected_version_excluding - and not max_affected_version_excluding - ): + if not cpe_uri and not mii and not mai and not mie and not mae: return None if cpe_uri: parts = CPE_REGEX.match(cpe_uri) - version = ( - max_affected_version_including - if max_affected_version_including - else parts.group(3) - ) + version = mai if mai else parts.group(3) version_left = "" version_right = "" - if min_affected_version_excluding: - version_left = ">" + min_affected_version_excluding - if min_affected_version_including and min_affected_version_including != "*": - version_left = ">=" + min_affected_version_including - if max_affected_version_excluding: - version_right = "<" + max_affected_version_excluding - if max_affected_version_including and max_affected_version_including != "*": - version_right = "<=" + max_affected_version_including + if mie: + version_left = ">" + mie + if mii and mii != "*": + version_left = ">=" + mii + if mae: + version_right = "<" + mae + if mai and mai != "*": + version_right = "<=" + mai if version_left and not version_right: version = version_left # Convert >0.0.0 to * @@ -446,8 +436,8 @@ def from_values( elif not version_left and version_right: version = version_right elif version_left and version_right: - if min_affected_version_including == max_affected_version_including: - version = max_affected_version_including + if mii == mai: + version = mai else: version = "{}-{}".format(version_left, version_right) if parts: diff --git a/vdb/lib/aqua.py b/vdb/lib/aqua.py index ab647d8..bab2679 100644 --- a/vdb/lib/aqua.py +++ b/vdb/lib/aqua.py @@ -9,10 +9,10 @@ import httpx -from vdb.lib import CustomNamedTemporaryFile -from vdb.lib import config as config +from vdb.lib import CustomNamedTemporaryFile, config from vdb.lib.nvd import NvdSource from vdb.lib.utils import ( + compress_str, convert_score_severity, get_cvss3_from_vector, get_default_cve_data, @@ -232,7 +232,7 @@ def alsa_to_vuln(self, cve_data): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) except Exception: pass @@ -310,7 +310,7 @@ def alas_rlsa_to_vuln(self, cve_data, vendor): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) done_pkgs[pkg_key] = True except Exception: @@ -420,7 +420,7 @@ def ubuntu_to_vuln(self, cve_data): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) except Exception: pass @@ -528,7 +528,7 @@ def redhat_to_vuln(self, cve_data): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) done_pkgs[pkg_key] = True except Exception: @@ -590,7 +590,7 @@ def arch_to_vuln(self, cve_data): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) except Exception: pass @@ -673,7 +673,7 @@ def suse_to_vuln(self, cve_data): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) done_pkgs[pkg_key] = True except Exception: @@ -743,7 +743,7 @@ def photon_to_vuln(self, cve_data): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) except Exception: pass @@ -871,7 +871,7 @@ def debian_to_vuln(self, cve_data): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) except Exception: pass diff --git a/vdb/lib/config.py b/vdb/lib/config.py index 17efc17..b7d45c5 100644 --- a/vdb/lib/config.py +++ b/vdb/lib/config.py @@ -58,7 +58,6 @@ "github": "https://osv-vulnerabilities.storage.googleapis.com/GitHub%20Actions/all.zip", "android": "https://osv-vulnerabilities.storage.googleapis.com/Android/all.zip", "alpine": "https://osv-vulnerabilities.storage.googleapis.com/Alpine/all.zip", - "gsd": "https://osv-vulnerabilities.storage.googleapis.com/GSD/all.zip", "linux": "https://osv-vulnerabilities.storage.googleapis.com/Linux/all.zip", "debian": "https://osv-vulnerabilities.storage.googleapis.com/Debian/all.zip", "oss-fuzz": "https://osv-vulnerabilities.storage.googleapis.com/OSS-Fuzz/all.zip", @@ -80,3 +79,10 @@ # This is highly important for debian where a specific distro may be non-vulnerable # While CPEs are the correct method of representing the exclusion, this version hack is aimed to be a short workaround placeholder_exclude_version = "88.88.8" + +# How many CVEs should be packed and written to the db file as a unit +# A large value here requires a larger max_buffer_size. Else could lead to msgpack.exceptions.BufferFull exceptions during read +batch_write_size = 20 + +# Limits size of unpacked data +max_buffer_size = 200 * 1024 * 1024 # 200 MiB diff --git a/vdb/lib/db.py b/vdb/lib/db.py index 0057411..72764f5 100644 --- a/vdb/lib/db.py +++ b/vdb/lib/db.py @@ -1,7 +1,6 @@ from collections import defaultdict -from vdb.lib import config as config -from vdb.lib import storage as storage +from vdb.lib import config, storage from vdb.lib.utils import convert_to_occurrence, parse_cpe, version_compare index_data = None @@ -25,12 +24,12 @@ def build_index(index_pos_list): for d in dp.get("index_list"): cve_id = d.get("id") min_version = d.get( - "min_affected_version_excluding", - d.get("min_affected_version_including"), + "mie", + d.get("mii"), ) max_version = d.get( - "max_affected_version_excluding", - d.get("max_affected_version_including"), + "mae", + d.get("mai"), ) if not min_version: min_version = "0" @@ -125,28 +124,20 @@ def _key_func(data, match_list): return False cpe_uri = data["details"].get("cpe_uri") package = data["details"].get("package") - min_affected_version_including = data["details"].get( - "min_affected_version_including", "0" - ) - min_affected_version_excluding = data["details"].get( - "min_affected_version_excluding", None - ) - max_affected_version_including = data["details"].get( - "max_affected_version_including", "*" - ) - max_affected_version_excluding = data["details"].get( - "max_affected_version_excluding", None - ) + min_affected_version_including = data["details"].get("mii", "0") + min_affected_version_excluding = data["details"].get("mie", None) + max_affected_version_including = data["details"].get("mai", "*") + max_affected_version_excluding = data["details"].get("mae", None) else: cpe_uri = data.details.cpe_uri package = data.details.package - min_affected_version_including = data.details.min_affected_version_including - max_affected_version_including = data.details.max_affected_version_including - min_affected_version_excluding = data.details.min_affected_version_excluding - max_affected_version_excluding = data.details.max_affected_version_excluding + min_affected_version_including = data.details.mii + max_affected_version_including = data.details.mai + min_affected_version_excluding = data.details.mie + max_affected_version_excluding = data.details.mae if not cpe_uri: return False - vendor, _, _, cve_type = parse_cpe(cpe_uri) + vendor, _, _, _ = parse_cpe(cpe_uri) for match in match_list: name_ver = match.split("|") # Search by name and version diff --git a/vdb/lib/gha.py b/vdb/lib/gha.py index 5c2df7d..a707250 100755 --- a/vdb/lib/gha.py +++ b/vdb/lib/gha.py @@ -23,9 +23,9 @@ import httpx -from vdb.lib import config as config +from vdb.lib import config from vdb.lib.nvd import NvdSource -from vdb.lib.utils import get_default_cve_data +from vdb.lib.utils import compress_str, get_default_cve_data logging.basicConfig( level=logging.INFO, format="%(levelname)s [%(asctime)s] %(message)s" @@ -33,7 +33,7 @@ LOG = logging.getLogger(__name__) api_token = os.environ.get("GITHUB_TOKEN") -headers = {"Authorization": "token %s" % api_token} +headers = {"Authorization": f"token {api_token}"} json_lib = orjson if ORJSON_AVAILABLE else json @@ -119,9 +119,7 @@ def download_recent(self, local_store=True): def fetch(self, type): """Private method to fetch the advisory data via GraphQL api""" LOG.debug( - "Download GitHub advisory from {} with cursor {}".format( - config.gha_url, type - ) + "Download GitHub advisory from %s with cursor %s", config.gha_url, type ) client = httpx.Client(http2=True, follow_redirects=True, timeout=180) r = client.post(url=config.gha_url, json=get_query(type=type), headers=headers) @@ -271,7 +269,7 @@ def convert(self, cve_data): try: tdata_json = json_lib.loads(tdata) vuln = NvdSource.convert_vuln(tdata_json) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) except Exception as e: LOG.debug(e) diff --git a/vdb/lib/npm.py b/vdb/lib/npm.py index 867631e..d45ac48 100644 --- a/vdb/lib/npm.py +++ b/vdb/lib/npm.py @@ -16,9 +16,10 @@ import httpx -from vdb.lib import config as config +from vdb.lib import config from vdb.lib.nvd import NvdSource from vdb.lib.utils import ( + compress_str, convert_md_references, convert_to_occurrence, fix_text, @@ -72,7 +73,7 @@ def bulk_search(self, app_info, pkg_list): def fetch(self, payload): client = httpx.Client(http2=True, follow_redirects=True, timeout=180) - LOG.debug("Fetch npm advisory from {}".format(config.npm_audit_url)) + LOG.debug("Fetch npm advisory from %s", config.npm_audit_url) r = client.post(url=config.npm_audit_url, json=payload) json_data = r.json() return self.convert(json_data) @@ -267,7 +268,7 @@ def to_vuln(self, v, ret_data): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) except Exception as e: LOG.debug(e) diff --git a/vdb/lib/nvd.py b/vdb/lib/nvd.py index ddc2167..6a5ac87 100644 --- a/vdb/lib/nvd.py +++ b/vdb/lib/nvd.py @@ -19,8 +19,8 @@ Vulnerability, VulnerabilityDetail, VulnerabilitySource, + config, ) -from vdb.lib import config as config from vdb.lib import db as dbLib logging.basicConfig( @@ -76,7 +76,7 @@ def download_recent(self, local_store=True): def fetch(self, year): """Private Method which downloads the given CVE gzip from NVD""" url = config.nvd_url % dict(year=year) - LOG.debug("Download NVD CVE from {}".format(url)) + LOG.debug("Download NVD CVE from %s", url) with CustomNamedTemporaryFile() as tf: try: with httpx.stream("GET", url, follow_redirects=True, timeout=180) as r: @@ -84,7 +84,7 @@ def fetch(self, year): tf.write(chunk) tf.flush() except Exception: - logging.warning(f"Exception while downloading NVD feed from {url}") + logging.warning("Exception while downloading NVD feed from %s", url) return None with gzip.open(tf.name, "rb") as gzipjf: try: @@ -93,7 +93,8 @@ def fetch(self, year): return self.convert(json_data) except Exception: logging.warning( - f"Exception while parsing NVD CVE feed for {year}. Please try after some time" + "Exception while parsing NVD CVE feed for %s. Please try after some time", + year, ) return None @@ -203,18 +204,10 @@ def convert_vuln_detail(vuln): continue if cpe["vulnerable"] and cpe.get("cpe23Uri"): detail["cpe_uri"] = cpe["cpe23Uri"] - detail["min_affected_version_including"] = cpe.get( - "versionStartIncluding" - ) - detail["min_affected_version_excluding"] = cpe.get( - "versionStartExcluding" - ) - detail["max_affected_version_including"] = cpe.get( - "versionEndIncluding" - ) - detail["max_affected_version_excluding"] = cpe.get( - "versionEndExcluding" - ) + detail["mii"] = cpe.get("versionStartIncluding") + detail["mie"] = cpe.get("versionStartExcluding") + detail["mai"] = cpe.get("versionEndIncluding") + detail["mae"] = cpe.get("versionEndExcluding") detail["source_update_time"] = vuln["lastModifiedDate"] cpe_details_list.append(detail) else: # cpe is not vulnerable diff --git a/vdb/lib/osv.py b/vdb/lib/osv.py index d60c6d2..6d4f9e1 100644 --- a/vdb/lib/osv.py +++ b/vdb/lib/osv.py @@ -7,10 +7,10 @@ import httpx -from vdb.lib import CustomNamedTemporaryFile -from vdb.lib import config as config +from vdb.lib import CustomNamedTemporaryFile, config from vdb.lib.nvd import NvdSource from vdb.lib.utils import ( + compress_str, convert_score_severity, get_cvss3_from_vector, get_default_cve_data, @@ -288,7 +288,7 @@ def to_vuln(self, cve_data): ) try: vuln = NvdSource.convert_vuln(json_lib.loads(tdata)) - vuln.description = description + vuln.description = compress_str(description) ret_data.append(vuln) except Exception: pass diff --git a/vdb/lib/storage.py b/vdb/lib/storage.py index e7e8fbe..592979b 100644 --- a/vdb/lib/storage.py +++ b/vdb/lib/storage.py @@ -2,12 +2,12 @@ import msgpack -from vdb.lib import config as config +from vdb.lib import config from vdb.lib.utils import chunk_list, parse_cpe, serialize_vuln_list -read_size = 256 -batch_write_size = 20 -max_buffer_size = 10 * 1024 * 1024 # 10 MiB +batch_write_size = config.batch_write_size + +max_buffer_size = config.max_buffer_size def store(datas, db_file=config.vdb_bin_file, index_file=config.vdb_bin_index): @@ -17,29 +17,22 @@ def store(datas, db_file=config.vdb_bin_file, index_file=config.vdb_bin_index): :param db_file: DB file to use """ data_list = serialize_vuln_list(datas) + packed_obj = None for batch in chunk_list(data_list, batch_write_size): index_list = [] for data in batch: if data["details"]["cpe_uri"]: - vendor, _, _, cve_type = parse_cpe(data["details"]["cpe_uri"]) + vendor, _, _, _ = parse_cpe(data["details"]["cpe_uri"]) if vendor: index_list.append( { "id": data.get("id"), "vendor": vendor.lower(), "name": data["details"]["package"].lower(), - "min_affected_version_including": data["details"].get( - "min_affected_version_including" - ), - "max_affected_version_including": data["details"].get( - "max_affected_version_including" - ), - "min_affected_version_excluding": data["details"].get( - "min_affected_version_excluding" - ), - "max_affected_version_excluding": data["details"].get( - "max_affected_version_excluding" - ), + "mii": data["details"].get("mii"), + "mai": data["details"].get("mai"), + "mie": data["details"].get("mie"), + "mae": data["details"].get("mae"), } ) packed_obj = msgpack.packb(batch, use_bin_type=True) diff --git a/vdb/lib/utils.py b/vdb/lib/utils.py index a4328a3..d63da71 100644 --- a/vdb/lib/utils.py +++ b/vdb/lib/utils.py @@ -1,3 +1,4 @@ +import codecs import importlib import re import string @@ -63,6 +64,8 @@ def _load(d): for item in d: li.append(_load(item)) return li + elif isinstance(d, str) and ("\\n" in d or "\\t" in d): + return decompress_str(d) elif isinstance(d, dict) and "type" in d: # object t = d["type"] if t == "datetime": @@ -131,6 +134,7 @@ def _dump(obj, path): return d elif ( isinstance(obj, str) + or isinstance(obj, bytes) or isinstance(obj, int) or isinstance(obj, float) or isinstance(obj, complex) @@ -158,7 +162,7 @@ def serialize_vuln_list(datas): for data in datas: ddata = data details = None - if type(data) != "dict": + if not isinstance(data, dict): ddata = vars(data) details = data.details else: @@ -372,15 +376,15 @@ def is_hash_mode( compare_ver, min_version, max_version, - min_affected_version_excluding, - max_affected_version_excluding, + mie, + mae, ): return ( checkHex(compare_ver) or checkHex(min_version) or checkHex(max_version) - or checkHex(min_affected_version_excluding) - or checkHex(max_affected_version_excluding) + or checkHex(mie) + or checkHex(mae) ) @@ -388,15 +392,15 @@ def is_epoch_mode( compare_ver, min_version, max_version, - min_affected_version_excluding, - max_affected_version_excluding, + mie, + mae, ): return ( checkEpoch(compare_ver) or checkEpoch(min_version) or checkEpoch(max_version) - or checkEpoch(min_affected_version_excluding) - or checkEpoch(max_affected_version_excluding) + or checkEpoch(mie) + or checkEpoch(mae) ) @@ -414,8 +418,8 @@ def trim_epoch( compare_ver, min_version, max_version, - min_affected_version_excluding, - max_affected_version_excluding, + mie, + mae, ): if checkEpoch(compare_ver): compare_ver = trim_epoch_colon(compare_ver) @@ -423,20 +427,16 @@ def trim_epoch( min_version = trim_epoch_colon(min_version) if checkEpoch(max_version): max_version = trim_epoch_colon(max_version) - if checkEpoch(min_affected_version_excluding): - min_affected_version_excluding = trim_epoch_colon( - min_affected_version_excluding - ) - if checkEpoch(max_affected_version_excluding): - max_affected_version_excluding = trim_epoch_colon( - max_affected_version_excluding - ) + if checkEpoch(mie): + mie = trim_epoch_colon(mie) + if checkEpoch(mae): + mae = trim_epoch_colon(mae) return ( compare_ver, min_version, max_version, - min_affected_version_excluding, - max_affected_version_excluding, + mie, + mae, ) @@ -444,8 +444,8 @@ def version_compare( compare_ver, min_version, max_version, - min_affected_version_excluding=None, - max_affected_version_excluding=None, + mie=None, + mae=None, ): """Function to check if the given version is between min and max version @@ -459,63 +459,50 @@ def version_compare( True """ # Handle placeholder fix version - if max_affected_version_excluding == placeholder_fix_version and compare_ver: + if mae == placeholder_fix_version and compare_ver: return True # Fix min versions that are erroneously sent as * - if ( - min_version - and min_version == "*" - and not min_affected_version_excluding - and max_affected_version_excluding - and "." in max_affected_version_excluding - ): + if min_version and min_version == "*" and not mie and mae and "." in mae: min_version = 0 hash_mode_detected = is_hash_mode( compare_ver, min_version, max_version, - min_affected_version_excluding, - max_affected_version_excluding, + mie, + mae, ) # Debian OS packages could have epoch. Detect and extract the upstream version epoch_mode_detected = is_epoch_mode( compare_ver, min_version, max_version, - min_affected_version_excluding, - max_affected_version_excluding, + mie, + mae, ) ubuntu_mode_detected = False if epoch_mode_detected: # Easy check - if ( - compare_ver - and max_affected_version_excluding - and compare_ver == max_affected_version_excluding - ): + if compare_ver and mae and compare_ver == mae: return False ( tcompare_ver, tmin_version, tmax_version, - tmin_affected_version_excluding, - tmax_affected_version_excluding, + tmie, + tmae, ) = trim_epoch( compare_ver, min_version, max_version, - min_affected_version_excluding, - max_affected_version_excluding, + mie, + mae, ) # 1.10-0ubuntu4 < 1.10-0ubuntu4.1 if ( tcompare_ver == tmax_version - or tcompare_ver == tmax_affected_version_excluding + or tcompare_ver == tmae or (max_version and max_version.startswith(compare_ver)) - or ( - max_affected_version_excluding - and max_affected_version_excluding.startswith(compare_ver) - ) + or (mae and mae.startswith(compare_ver)) ): if ( max_version @@ -523,11 +510,7 @@ def version_compare( and max_version != compare_ver ): return True - if ( - max_affected_version_excluding - and max_affected_version_excluding.startswith(compare_ver) - and max_affected_version_excluding != compare_ver - ): + if mae and mae.startswith(compare_ver) and mae != compare_ver: return True # Sorry about this but ubuntu versioning scheme is a PITA if "ubuntu" in compare_ver or "build" in compare_ver or "deb" in compare_ver: @@ -536,10 +519,8 @@ def version_compare( compare_ver = compare_ver.split(":")[-1] if max_version and ":" in max_version: max_version = max_version.split(":")[-1] - if max_affected_version_excluding and ":" in max_affected_version_excluding: - max_affected_version_excluding = max_affected_version_excluding.split( - ":" - )[-1] + if mae and ":" in mae: + mae = mae.split(":")[-1] if "ubuntu" in compare_ver: tmpcvArr = compare_ver.split("ubuntu") elif "deb" in compare_ver: @@ -563,80 +544,57 @@ def version_compare( max_version = tmpmv if max_version and "-" in max_version: max_version = max_version.split("-")[0] - elif max_affected_version_excluding and ( - "ubuntu" in max_affected_version_excluding - or "deb" in max_affected_version_excluding - or "-" in max_affected_version_excluding - ): + elif mae and ("ubuntu" in mae or "deb" in mae or "-" in mae): indexToUse = 0 - if max_affected_version_excluding.startswith(tmpcvArr[0]): + if mae.startswith(tmpcvArr[0]): indexToUse = -1 for bstr in ("ubuntu", "deb", "-"): - if bstr in max_affected_version_excluding: - tmpmvArr = max_affected_version_excluding.split(bstr) + if bstr in mae: + tmpmvArr = mae.split(bstr) # If the prefix is equal after splitting operate with suffix alone if tmpmvArr[0] == tmpcvArr[0]: indexToUse = -1 tmpmv = tmpmvArr[indexToUse] break compare_ver = tmpcvArr[indexToUse] - max_affected_version_excluding = tmpmv - if ( - max_affected_version_excluding - and "-" in max_affected_version_excluding - ): - max_affected_version_excluding = ( - max_affected_version_excluding.split("-")[0] - ) + mae = tmpmv + if mae and "-" in mae: + mae = mae.split("-")[0] if "-" in compare_ver: compare_ver = compare_ver.split("-")[0] # If after splitting the versions are equal return False if (max_version and compare_ver == max_version) or ( - max_affected_version_excluding - and compare_ver == max_affected_version_excluding + mae and compare_ver == mae ): return False - if max_affected_version_excluding: - if VersionInfo.is_valid(compare_ver) and VersionInfo.is_valid( - max_affected_version_excluding - ): - cmp_value = VersionInfo.parse(compare_ver).compare( - max_affected_version_excluding - ) + if mae: + if VersionInfo.is_valid(compare_ver) and VersionInfo.is_valid(mae): + cmp_value = VersionInfo.parse(compare_ver).compare(mae) return cmp_value < 0 - elif ( - "." not in compare_ver and "." not in max_affected_version_excluding - ): + elif "." not in compare_ver and "." not in mae: compare_ver = re.split(r"[+~]", compare_ver)[0] - max_affected_version_excluding = re.split( - r"[+~]", max_affected_version_excluding - )[0] + mae = re.split(r"[+~]", mae)[0] exnum = list(filter(str.isdigit, compare_ver)) if exnum: compare_ver_restnum = int("".join(exnum)) - exnum = list( - filter(str.isdigit, max_affected_version_excluding) - ) + exnum = list(filter(str.isdigit, mae)) if exnum: - max_affected_version_excluding_restnum = int("".join(exnum)) - return ( - compare_ver_restnum - < max_affected_version_excluding_restnum - ) + mae_restnum = int("".join(exnum)) + return compare_ver_restnum < mae_restnum if not ubuntu_mode_detected: compare_ver = tcompare_ver min_version = tmin_version max_version = tmax_version - min_affected_version_excluding = tmin_affected_version_excluding - max_affected_version_excluding = tmax_affected_version_excluding + mie = tmie + mae = tmae # Semver compatible and including versions provided is_min_exclude = False is_max_exclude = False - if (not min_version or min_version == "*") and min_affected_version_excluding: - min_version = min_affected_version_excluding + if (not min_version or min_version == "*") and mie: + min_version = mie is_min_exclude = True - if (not max_version or max_version == "*") and max_affected_version_excluding: - max_version = max_affected_version_excluding + if (not max_version or max_version == "*") and mae: + max_version = mae is_max_exclude = True if not min_version: min_version = "0" @@ -826,9 +784,9 @@ def version_compare( return True if compare_ver == max_version: return True - if compare_ver == min_affected_version_excluding: + if compare_ver == mie: return False - if compare_ver == max_affected_version_excluding: + if compare_ver == mae: return False return False return True @@ -921,10 +879,10 @@ def convert_to_occurrence(datas): package_issue=PackageIssue( affected_location=cpe_uri, fixed_location=vdetails.fixed_location, - min_affected_version_including=vdetails.min_affected_version_including, - max_affected_version_including=vdetails.max_affected_version_including, - min_affected_version_excluding=vdetails.min_affected_version_excluding, - max_affected_version_excluding=vdetails.max_affected_version_excluding, + mii=vdetails.mii, + mai=vdetails.mai, + mie=vdetails.mie, + mae=vdetails.mae, ), short_description=vobj["description"], long_description=None, @@ -1015,3 +973,15 @@ def convert_score_severity(score): def chunk_list(lst, size): for i in range(0, len(lst), size): yield lst[i : i + size] + + +def compress_str(s): + """Compress string by replacing for newlines and tabs""" + return s.strip().replace("\n", "\\n").replace(" ", "\\t") + + +def decompress_str(s): + """Decompress string by decoding escape characters""" + if isinstance(s, str): + s = bytes(s, "utf-8") + return codecs.escape_decode(s)[0].decode("utf-8")