From 14ea397feb9f923f813479721888474313084197 Mon Sep 17 00:00:00 2001 From: Jacek Puchta Date: Wed, 25 Oct 2023 19:15:10 +0200 Subject: [PATCH] python services Signed-off-by: Prabhu Subramanian Update packages. Add sample test for python evinse Signed-off-by: Prabhu Subramanian Integrate evidence into cdxgen Signed-off-by: Prabhu Subramanian --- .github/workflows/app-release.yml | 2 +- .github/workflows/dockertests.yml | 12 +- .github/workflows/nodejs.yml | 2 +- .github/workflows/python-atom-tests.yml | 2 +- .github/workflows/repotests.yml | 13 +- README.md | 5 +- bin/cdxgen.js | 72 ++++++++-- bin/evinse.js | 19 +-- data/frameworks-list.json | 40 ++++-- data/pypi-pkg-aliases.json | 6 + docs/ADVANCED.md | 23 ++- docs/CLI.md | 5 +- evinser.js | 181 ++++++++++++++++++------ index.js | 43 ++++-- package-lock.json | 12 +- package.json | 4 +- utils.js | 116 +++++++++++---- utils.test.js | 2 +- 18 files changed, 405 insertions(+), 154 deletions(-) diff --git a/.github/workflows/app-release.yml b/.github/workflows/app-release.yml index 8788b03d1..4a378579e 100644 --- a/.github/workflows/app-release.yml +++ b/.github/workflows/app-release.yml @@ -14,7 +14,7 @@ jobs: - name: Use Node.js uses: actions/setup-node@v3 with: - node-version: 20.5 + node-version: '21.x' - name: Install dependencies run: | sudo apt-get install -y python3.8 python3.8-dev python3-pip python3-testresources python3-setuptools patchelf desktop-file-utils libgdk-pixbuf2.0-dev diff --git a/.github/workflows/dockertests.yml b/.github/workflows/dockertests.yml index 5262f2082..8648c7367 100644 --- a/.github/workflows/dockertests.yml +++ b/.github/workflows/dockertests.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - node-version: [18.x] + node-version: ['21.x'] java-version: ['19'] steps: - uses: actions/checkout@v4 @@ -22,7 +22,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Set up JDK uses: actions/setup-java@v3 with: @@ -68,7 +68,7 @@ jobs: strategy: matrix: - node-version: [18.x] + node-version: ['21.x'] java-version: ['19'] steps: - uses: actions/checkout@v4 @@ -79,7 +79,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Set up JDK uses: actions/setup-java@v3 with: @@ -108,7 +108,7 @@ jobs: strategy: matrix: - node-version: [18.x] + node-version: ['21.x'] java-version: ['19'] steps: - uses: actions/checkout@v4 @@ -119,7 +119,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Set up JDK uses: actions/setup-java@v3 with: diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index 3a1ba4011..ca1345ed8 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: - node-version: [16.x, 18.x, 20.x] + node-version: ['16.x', '18.x', '20.x', '21.x'] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/python-atom-tests.yml b/.github/workflows/python-atom-tests.yml index 9f62f84c5..d207f02cb 100644 --- a/.github/workflows/python-atom-tests.yml +++ b/.github/workflows/python-atom-tests.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - node-version: [18.x] + node-version: ['21.x'] steps: - uses: actions/checkout@v4 with: diff --git a/.github/workflows/repotests.yml b/.github/workflows/repotests.yml index c5b950517..3ad09f2f3 100644 --- a/.github/workflows/repotests.yml +++ b/.github/workflows/repotests.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: false matrix: - node-version: [18.x] + node-version: ['21.x'] os: ['ubuntu-latest', 'windows-latest'] runs-on: ${{ matrix.os }} steps: @@ -148,18 +148,21 @@ jobs: with: repository: 'hoolicorp/java-sec-code' path: 'repotests/java-sec-code' + - uses: actions/checkout@v3 + with: + repository: 'DefectDojo/django-DefectDojo' + path: 'repotests/django-DefectDojo' - uses: dtolnay/rust-toolchain@stable - name: repotests run: | - bin/cdxgen.js -p -t js --no-recurse -o bom.json . - bin/evinse.js -l js -i bom.json -o bom.evinse.json --with-reachables . + bin/cdxgen.js -p -t js --no-recurse -o bom.json --evidence . bin/cdxgen.js -p -t java repotests/java-sec-code -o bomresults/bom-java-sec-code.json bin/cdxgen.js -p -t java --author foo --author bar repotests/java-sec-code -o bomresults/bom-java-sec-code.json bin/cdxgen.js -p -t java repotests/java-sec-code -o bomresults/bom-java-sec-code.json --required-only bin/cdxgen.js -p -t java repotests/java-sec-code -o bomresults/bom-java-sec-code.json --filter postgres --filter json bin/cdxgen.js -p -t java repotests/java-sec-code -o bomresults/bom-java-sec-code.json --only spring - bin/cdxgen.js -p -t java repotests/java-sec-code -o repotests/java-sec-code/bom.json --deep - node bin/evinse.js -i repotests/java-sec-code/bom.json -o bomresults/java-sec-code.evinse.json -l java --with-reachables -p repotests/java-sec-code + bin/cdxgen.js -p -t java repotests/java-sec-code -o repotests/java-sec-code/bom.json --deep --evidence + bin/cdxgen.js -t python repotests/django-DefectDojo -o repotests/django-DefectDojo/bom.json --deep --evidence bin/cdxgen.js -p -r -t java repotests/shiftleft-java-example -o bomresults/bom-java.json --generate-key-and-sign node bin/evinse.js -i bomresults/bom-java.json -o bomresults/bom-java.evinse.json -l java --with-data-flow -p repotests/shiftleft-java-example SBOM_SIGN_ALGORITHM=RS512 SBOM_SIGN_PRIVATE_KEY=bomresults/private.key SBOM_SIGN_PUBLIC_KEY=bomresults/public.key bin/cdxgen.js -p -r -t github repotests/shiftleft-java-example -o bomresults/bom-github.json diff --git a/README.md b/README.md index ef06e03fb..3ea613445 100644 --- a/README.md +++ b/README.md @@ -170,10 +170,7 @@ Options: faults to true. Pass --no-validate to disable. [boolean] [default: true] --evidence Generate SBOM with evidence for supported languag - es. WIP [boolean] [default: false] - --usages-slices-file Path for the usages slice file created by atom. - --data-flow-slices-file Path for the data-flow slice file created by atom - . + es. [boolean] [default: false] --spec-version CycloneDX Specification version to use. Defaults to 1.5 [default: 1.5] --filter Filter components containining this word in purl. diff --git a/bin/cdxgen.js b/bin/cdxgen.js index fe5c2d8e9..b72843c52 100755 --- a/bin/cdxgen.js +++ b/bin/cdxgen.js @@ -10,10 +10,19 @@ import crypto from "node:crypto"; import { fileURLToPath } from "node:url"; import globalAgent from "global-agent"; import process from "node:process"; -import { printTable, printDependencyTree } from "../display.js"; +import { + printCallStack, + printOccurrences, + printServices, + printReachables, + printTable, + printDependencyTree +} from "../display.js"; import { findUpSync } from "find-up"; import { load as _load } from "js-yaml"; import { postProcess } from "../postgen.js"; +import { analyzeProject, createEvinseFile, prepareDB } from "../evinser.js"; +import { ATOM_DB } from "../utils.js"; // Support for config files const configPath = findUpSync([ @@ -48,7 +57,14 @@ const args = yargs(hideBin(process.argv)) .env("CDXGEN") .option("output", { alias: "o", - description: "Output file for bom.xml or bom.json. Default bom.json" + description: "Output file for bom.xml or bom.json. Default bom.json", + default: "bom.json" + }) + .option("evinse-output", { + description: + "Create bom with evidence as a separate file. Default bom.json", + default: "bom.json", + hidden: true }) .option("type", { alias: "t", @@ -143,16 +159,29 @@ const args = yargs(hideBin(process.argv)) "Validate the generated SBOM using json schema. Defaults to true. Pass --no-validate to disable." }) .option("evidence", { - hidden: true, type: "boolean", default: false, - description: "Generate SBOM with evidence for supported languages. WIP" + description: "Generate SBOM with evidence for supported languages." + }) + .option("deps-slices-file", { + description: "Path for the parsedeps slice file created by atom.", + default: "deps.slices.json", + hidden: true }) .option("usages-slices-file", { - description: "Path for the usages slice file created by atom." + description: "Path for the usages slices file created by atom.", + default: "usages.slices.json", + hidden: true }) .option("data-flow-slices-file", { - description: "Path for the data-flow slice file created by atom." + description: "Path for the data-flow slices file created by atom.", + default: "data-flow.slices.json", + hidden: true + }) + .option("reachables-slices-file", { + description: "Path for the reachables slices file created by atom.", + default: "reachables.slices.json", + hidden: true }) .option("spec-version", { description: "CycloneDX Specification version to use. Defaults to 1.5", @@ -289,9 +318,6 @@ const checkPermissions = (filePath) => { if (options.requiredOnly || options["filter"] || options["only"]) { bomNSData = postProcess(bomNSData, options); } - if (!args.output) { - args.output = "bom.json"; - } if ( args.output && (typeof args.output === "string" || args.output instanceof String) @@ -458,6 +484,34 @@ const checkPermissions = (filePath) => { console.log("Try running the command with -t or -r argument"); } } + // Evidence generation + if (args.evidence) { + const evinseOptions = { + _: args._, + input: options.output, + output: options.evinseOutput, + language: options.projectType || "java", + dbPath: process.env.ATOM_DB || ATOM_DB, + skipMavenCollector: false, + force: false, + withReachables: options.deep, + usagesSlicesFile: options.usagesSlicesFile, + dataFlowSlicesFile: options.dataFlowSlicesFile, + reachablesSlicesFile: options.reachablesSlicesFile + }; + const dbObjMap = await prepareDB(evinseOptions); + if (dbObjMap) { + const sliceArtefacts = await analyzeProject(dbObjMap, evinseOptions); + const evinseJson = createEvinseFile(sliceArtefacts, evinseOptions); + bomNSData.bomJson = evinseJson; + if (args.print && evinseJson) { + printOccurrences(evinseJson); + printCallStack(evinseJson); + printReachables(sliceArtefacts); + printServices(evinseJson); + } + } + } // Perform automatic validation if (args.validate) { if (!validateBom(bomNSData.bomJson)) { diff --git a/bin/evinse.js b/bin/evinse.js index 59d391a4d..9e2595de0 100755 --- a/bin/evinse.js +++ b/bin/evinse.js @@ -3,9 +3,7 @@ // Evinse (Evinse Verification Is Nearly SBOM Evidence) import yargs from "yargs"; import { hideBin } from "yargs/helpers"; -import { join } from "node:path"; import fs from "node:fs"; -import { homedir, platform as _platform } from "node:os"; import process from "node:process"; import { analyzeProject, createEvinseFile, prepareDB } from "../evinser.js"; import { validateBom } from "../validator.js"; @@ -15,6 +13,7 @@ import { printServices, printReachables } from "../display.js"; +import { ATOM_DB } from "../utils.js"; import { findUpSync } from "find-up"; import { load as _load } from "js-yaml"; @@ -38,22 +37,6 @@ if (configPath) { } } -const isWin = _platform() === "win32"; -const isMac = _platform() === "darwin"; -let ATOM_DB = join(homedir(), ".local", "share", ".atomdb"); -if (isWin) { - ATOM_DB = join(homedir(), "AppData", "Local", ".atomdb"); -} else if (isMac) { - ATOM_DB = join(homedir(), "Library", "Application Support", ".atomdb"); -} - -if (!process.env.ATOM_DB && !fs.existsSync(ATOM_DB)) { - try { - fs.mkdirSync(ATOM_DB, { recursive: true }); - } catch (e) { - // ignore - } -} const args = yargs(hideBin(process.argv)) .env("EVINSE") .option("input", { diff --git a/data/frameworks-list.json b/data/frameworks-list.json index b40f4d718..339af6ea1 100644 --- a/data/frameworks-list.json +++ b/data/frameworks-list.json @@ -4,8 +4,8 @@ "System.ServiceModel", "System.Data", "spring", - "flask", - "django", + "pkg:pypi/flask", + "pkg:pypi/django", "beego", "chi", "echo", @@ -30,15 +30,33 @@ "express", "knex", "vue", - "aiohttp", - "bottle", - "cherrypy", - "drt", - "falcon", - "hug", - "pyramid", - "sanic", - "tornado", + "pkg:pypi/aiohttp", + "pkg:pypi/bottle", + "pkg:pypi/cherrypy", + "pkg:pypi/drt", + "pkg:pypi/falcon", + "pkg:pypi/hug", + "pkg:pypi/pyramid", + "pkg:pypi/sanic", + "pkg:pypi/tornado", + "pkg:pypi/fastapi", + "pkg:pypi/pyqt", + "pkg:pypi/tkinter", + "pkg:pypi/kivy", + "pkg:pypi/pyside", + "pkg:pypi/scikit", + "pkg:pypi/tensorflow", + "pkg:pypi/pytorch", + "pkg:pypi/keras", + "pkg:pypi/numpy", + "pkg:pypi/scipy", + "pkg:pypi/pandas", + "pkg:pypi/matplotlib", + "pkg:pypi/google-api-core", + "pkg:pypi/google-cloud", + "pkg:pypi/botocore", + "pkg:pypi/boto3", + "pkg:pypi/azure", "vibora", "koa", "-sdk", diff --git a/data/pypi-pkg-aliases.json b/data/pypi-pkg-aliases.json index 974e0cfc2..11916e16d 100644 --- a/data/pypi-pkg-aliases.json +++ b/data/pypi-pkg-aliases.json @@ -553,6 +553,7 @@ "creole": "python-creole", "creoleparser": "creoleparser", "crispy-forms": "django-crispy-forms", + "crum": "django-crum", "cronlog": "python-crontab", "crontab": "python-crontab", "crypto": "pycryptodome", @@ -589,6 +590,7 @@ "djcelery": "django-celery", "djkombu": "django-kombu", "djorm-pgarray": "djorm-ext-pgarray", + "django-filters": "filters-django", "dns": "dnspython", "docgen": "ansible-docgenerator", "docker": "docker-py", @@ -631,6 +633,7 @@ "fdpexpect": "pexpect", "fedora": "python-fedora", "fias": "ailove-django-fias", + "fieldsignals": "django-fieldsignals", "fiftyone-degrees": "51degrees-mobile-detector", "fiftyonedegrees": "51degrees-mobile-detector-v3-wrapper", "five": "five.customerize", @@ -709,6 +712,7 @@ "igraph": "python-igraph", "imdb": "imdbpy", "impala": "impyla", + "imagekit": "django-imagekit", "impersonate": "django-impersonate", "inmemorystorage": "ambition-inmemorystorage", "ipaddress": "backport-ipaddress", @@ -845,6 +849,7 @@ "path": "path.py", "patricia": "patricia-trie", "paver": "paver", + "packageurl": "packageurl-python", "peak": "proxytypes", "picasso": "anderson.picasso", "picklefield": "django-picklefield", @@ -1057,6 +1062,7 @@ "slugify": "unicode-slugify", "smarkets": "smk-python-sdk", "snappy": "ctypes-snappy", + "social-core": "social-auth-core", "social-django": "social-auth-app-django", "socketio": "python-socketio", "socketserver": "pies2overrides", diff --git a/docs/ADVANCED.md b/docs/ADVANCED.md index 3c11d6ee9..c796d0f68 100644 --- a/docs/ADVANCED.md +++ b/docs/ADVANCED.md @@ -95,7 +95,7 @@ Environment variables override values from the configuration files. ## Evinse Mode / SaaSBOM -Evinse (Evinse Verification Is Nearly SBOM Evidence) is a new command with cdxgen to generate component evidence and SaaSBOM for supported languages. The tool is powered by [atom](https://github.com/AppThreat/atom). +Evinse (Evinse Verification Is Nearly SBOM Evidence) is a new command with cdxgen to generate component evidence and SaaSBOM for supported languages. The tool is powered by [atom](https://github.com/AppThreat/atom). Beginning with cdxgen 9.9.x, cdxgen accepts a new argument called `--evidence` to generate such a comprehensive SBOM. This section is left for users interested in invoking the evinse tool directly for advanced use cases. occurrence evidence @@ -156,18 +156,22 @@ To generate an SBOM with evidence for a java project. evinse -i bom.json -o bom.evinse.json ``` -By default, only occurrence evidences are determined by creating usages slices. To generate callstack evidence, pass either `--with-data-flow` or `--with-reachables`. +By default, only occurrence evidence is determined by creating usages slices. To generate callstack evidence, pass either `--with-data-flow` or `--with-reachables`. #### Reachability-based call stack evidence atom supports reachability-based evidence generation for Java, JavaScript, and TypeScript applications. Reachability refers to data flows that originate from entry points (sources) ending at a sink (which are invocations to external libraries). The technique used is called "Forward-Reachability". -Two necessary prerequisites for this slicing mode are that the input SBOM must be generated with cdxgen and in deep mode (only for java, jars type) and must be placed within the application directory. +Two necessary prerequisites for this slicing mode are that the input SBOM must be generated with cdxgen and in deep mode (only for java, jars, python type) and must be placed within the application directory. ```shell cd -cdxgen -t java --deep -o bom.json . -evinse -i bom.json -o bom.evinse.json -l java --with-reachables . +cdxgen -t java --deep -o bom.json --evidence . +``` + +```shell +cd +cdxgen -t python --deep -o bom.json --evidence . ``` For JavaScript and TypeScript applications, deep mode is optional. @@ -202,6 +206,12 @@ For JavaScript or TypeScript projects, pass `-l javascript`. evinse -i bom.json -o bom.evinse.json --usages-slices-file usages.json --data-flow-slices-file data-flow.json -l javascript --with-data-flow ``` +For Python with cached usages and reachables file. + +```shell +evinse -i bom.json -o bom.evinse.json --usages-slices-file usages.json --reachables-slices-file reachables.json -l python --with-reachables +``` + ## Generate SBOM from maven or gradle cache There could be Java applications with complex dependency requirements. Or you might be interested in cataloging your Maven or gradle cache. @@ -249,8 +259,7 @@ cdxgen -t docker -o bom.json Why not? ```shell -cdxgen -t js -o bom.json -p --no-recurse . -evinse -i bom.json -o bom.evinse.json -l javascript --with-reachables . +cdxgen -t js -o bom.json -p --no-recurse --evidence . # Don't be surprised to see the service endpoint offered by cdxgen. # Review the reachables.slices.json and file any vulnerabilities or bugs! diff --git a/docs/CLI.md b/docs/CLI.md index d9a1d6641..eede934b6 100644 --- a/docs/CLI.md +++ b/docs/CLI.md @@ -108,10 +108,7 @@ Options: faults to true. Pass --no-validate to disable. [boolean] [default: true] --evidence Generate SBOM with evidence for supported languag - es. WIP [boolean] [default: false] - --usages-slices-file Path for the usages slice file created by atom. - --data-flow-slices-file Path for the data-flow slice file created by atom - . + es. [boolean] [default: false] --spec-version CycloneDX Specification version to use. Defaults to 1.5 [default: 1.5] --filter Filter components containining this word in purl. diff --git a/evinser.js b/evinser.js index f7ce97d79..f72d57169 100644 --- a/evinser.js +++ b/evinser.js @@ -7,7 +7,7 @@ import { collectMvnDependencies } from "./utils.js"; import { tmpdir } from "node:os"; -import path from "node:path"; +import path, { basename } from "node:path"; import fs from "node:fs"; import * as db from "./db.js"; import { PackageURL } from "packageurl-js"; @@ -22,6 +22,13 @@ const typePurlsCache = {}; * @param {object} Command line options */ export const prepareDB = async (options) => { + if (!options.dbPath.includes("memory") && !fs.existsSync(options.dbPath)) { + try { + fs.mkdirSync(options.dbPath, { recursive: true }); + } catch (e) { + // ignore + } + } const dirPath = options._[0] || "."; const bomJsonFile = options.input; if (!fs.existsSync(bomJsonFile)) { @@ -54,8 +61,6 @@ export const prepareDB = async (options) => { if ((!usagesSlice && !namespaceSlice) || options.force) { if (comp.purl.startsWith("pkg:maven")) { hasMavenPkgs = true; - } else if (isSlicingRequired(comp.purl)) { - purlsToSlice[comp.purl] = true; } } } @@ -250,17 +255,19 @@ export const initFromSbom = (components) => { const purlLocationMap = {}; const purlImportsMap = {}; for (const comp of components) { - if (!comp || !comp.evidence || !comp.evidence.occurrences) { + if (!comp || !comp.evidence) { continue; } - purlLocationMap[comp.purl] = new Set( - comp.evidence.occurrences.map((v) => v.location) - ); (comp.properties || []) .filter((v) => v.name === "ImportedModules") .forEach((v) => { purlImportsMap[comp.purl] = (v.value || "").split(","); }); + if (comp.evidence.occurrences) { + purlLocationMap[comp.purl] = new Set( + comp.evidence.occurrences.map((v) => v.location) + ); + } } return { purlLocationMap, @@ -412,7 +419,8 @@ export const parseObjectSlices = async ( if ( !slice.fileName || !slice.fileName.trim().length || - slice.fileName === "" + slice.fileName === "" || + slice.fileName === "" ) { continue; } @@ -426,6 +434,7 @@ export const parseObjectSlices = async ( ); detectServicesFromUsages(language, slice, servicesMap); } + detectServicesFromUDT(language, usageSlice.userDefinedTypes, servicesMap); return { purlLocationMap, servicesMap, @@ -475,10 +484,13 @@ export const parseSliceUsages = async ( atype[0] !== false && !isFilterableType(language, userDefinedTypesMap, atype[1]) ) { - if (!atype[1].includes("(")) { + if (!atype[1].includes("(") && !atype[1].includes(".py")) { typesToLookup.add(atype[1]); // Javascript calls can be resolved to a precise line number only from the call nodes - if (language == "javascript" && ausageLine) { + if ( + ["javascript", "js", "ts", "typescript"].includes(language) && + ausageLine + ) { if (atype[1].includes(":")) { typesToLookup.add(atype[1].split("::")[0].replace(/:/g, "/")); } @@ -503,7 +515,10 @@ export const parseSliceUsages = async ( if ( !isFilterableType(language, userDefinedTypesMap, acall?.resolvedMethod) ) { - if (!acall?.resolvedMethod.includes("(")) { + if ( + !acall?.resolvedMethod.includes("(") && + !acall?.resolvedMethod.includes(".py") + ) { typesToLookup.add(acall?.resolvedMethod); // Javascript calls can be resolved to a precise line number only from the call nodes if (acall.lineNumber) { @@ -531,7 +546,7 @@ export const parseSliceUsages = async ( } for (const aparamType of acall?.paramTypes || []) { if (!isFilterableType(language, userDefinedTypesMap, aparamType)) { - if (!aparamType.includes("(")) { + if (!aparamType.includes("(") && !aparamType.includes(".py")) { typesToLookup.add(aparamType); if (acall.lineNumber) { if (aparamType.includes(":")) { @@ -580,16 +595,17 @@ export const parseSliceUsages = async ( } } else { // Check the namespaces db - const nsHits = - typePurlsCache[atype] || - (await dbObjMap.Namespaces.findAll({ + let nsHits = typePurlsCache[atype]; + if (["java", "jar"].includes(language)) { + nsHits = await dbObjMap.Namespaces.findAll({ attributes: ["purl"], where: { data: { [Op.like]: `%${atype}%` } } - })); + }); + } if (nsHits && nsHits.length) { for (const ns of nsHits) { if (!purlLocationMap[ns.purl]) { @@ -612,16 +628,21 @@ export const isFilterableType = ( ) => { if ( !typeFullName || - ["ANY", "UNKNOWN", "VOID"].includes(typeFullName.toUpperCase()) + ["ANY", "UNKNOWN", "VOID", "IMPORT"].includes(typeFullName.toUpperCase()) ) { return true; } - if ( - typeFullName.startsWith("") || @@ -645,13 +666,20 @@ export const isFilterableType = ( typeFullName.startsWith("{ ") || typeFullName.startsWith("JSON") || typeFullName.startsWith("void:") || - typeFullName.startsWith("LAMBDA") || - typeFullName.startsWith("../") || typeFullName.startsWith("node:") ) { return true; } } + if (["python", "py"].includes(language)) { + if ( + typeFullName.startsWith("tmp") || + typeFullName.startsWith("self.") || + typeFullName.startsWith("_") + ) { + return true; + } + } if (userDefinedTypesMap[typeFullName]) { return true; } @@ -715,6 +743,61 @@ export const detectServicesFromUsages = (language, slice, servicesMap = {}) => { } }; +/** + * Method to detect services from user defined types in the usage slice + * + * @param {string} language Application language + * @param {array} userDefinedTypes User defined types + * @param {object} servicesMap Existing service map + */ +export const detectServicesFromUDT = ( + language, + userDefinedTypes, + servicesMap +) => { + if ( + ["python", "py"].includes(language) && + userDefinedTypes && + userDefinedTypes.length + ) { + for (const audt of userDefinedTypes) { + if ( + audt.name.includes("route") || + audt.name.includes("path") || + audt.name.includes("url") + ) { + const fields = audt.fields || []; + if ( + fields.length && + fields[0] && + fields[0].name && + fields[0].name.length > 1 + ) { + const endpoints = extractEndpoints(language, fields[0].name); + let serviceName = "service"; + if (audt.fileName) { + serviceName = `${basename( + audt.fileName.replace(".py", "") + )}-service`; + } + if (!servicesMap[serviceName]) { + servicesMap[serviceName] = { + endpoints: new Set(), + authenticated: false, + xTrustBoundary: undefined + }; + } + if (endpoints) { + for (const endpoint of endpoints) { + servicesMap[serviceName].endpoints.add(endpoint); + } + } + } + } + } + } +}; + export const constructServiceName = (language, slice) => { let serviceName = "service"; if (slice?.fullName) { @@ -753,7 +836,10 @@ export const extractEndpoints = (language, code) => { ); } break; + case "js": + case "ts": case "javascript": + case "typescript": if (code.includes("app.") || code.includes("route")) { const matches = code.match(/['"](.*?)['"]/gi) || []; endpoints = matches @@ -769,24 +855,18 @@ export const extractEndpoints = (language, code) => { ); } break; + case "py": + case "python": + endpoints = (code.match(/['"](.*?)['"]/gi) || []) + .map((v) => v.replace(/["']/g, "").replace("\n", "")) + .filter((v) => v.length > 2); + break; default: break; } return endpoints; }; -/** - * Function to determine if slicing is required for the given language's dependencies. - * For performance reasons, we make java operate only with namespaces - * - * @param {string} purl - * @returns - */ -export const isSlicingRequired = (purl) => { - const language = purlToLanguage(purl); - return ["python"].includes(language); -}; - /** * Method to create the SBOM with evidence file called evinse file. * @@ -945,7 +1025,10 @@ export const collectDataFlowFrames = async ( continue; } let typeFullName = theNode.typeFullName; - if (language === "javascript" && typeFullName == "ANY") { + if ( + ["javascript", "js", "ts", "typescript"].includes(language) && + typeFullName == "ANY" + ) { if ( theNode.code && (theNode.code.startsWith("new ") || @@ -971,16 +1054,17 @@ export const collectDataFlowFrames = async ( } } else { // Check the namespaces db - const nsHits = - typePurlsCache[typeFullName] || - (await dbObjMap.Namespaces.findAll({ + let nsHits = typePurlsCache[typeFullName]; + if (["java", "jar"].includes(language)) { + nsHits = await dbObjMap.Namespaces.findAll({ attributes: ["purl"], where: { data: { [Op.like]: `%${typeFullName}%` } } - })); + }); + } if (nsHits && nsHits.length) { for (const ns of nsHits) { referredPurls.add(ns.purl); @@ -1099,7 +1183,7 @@ export const getClassTypeFromSignature = (language, typeFullName) => { const tmpA = typeFullName.split("."); tmpA.pop(); typeFullName = tmpA.join("."); - } else if (language === "javascript") { + } else if (["javascript", "js", "ts", "typescript"].includes(language)) { typeFullName = typeFullName.replace("new: ", "").replace("await ", ""); if (typeFullName.includes(":")) { const tmpA = typeFullName.split("::")[0].replace(/:/g, "/").split("/"); @@ -1108,6 +1192,15 @@ export const getClassTypeFromSignature = (language, typeFullName) => { } typeFullName = tmpA.join("/"); } + } else if (["python", "py"].includes(language)) { + typeFullName = typeFullName + .replace(".py:", "") + .replace(/\//g, ".") + .replace(".", "") + .replace(".", "") + .replace(".", "") + .replace(".__iter__", "") + .replace(".__init__", ""); } if ( typeFullName.startsWith(" { // Get the imported modules and a dedupe list of packages const parentDependsOn = new Set(); const retMap = await getPyModules(path, pkgList, options); + // We need to patch the existing package list to add ImportedModules for evinse to work + if (retMap.modList && retMap.modList.length) { + const iSymbolsMap = {}; + retMap.modList.forEach((v) => { + iSymbolsMap[v.name] = v.importedSymbols; + iSymbolsMap[v.name.replace(/_/g, "-")] = v.importedSymbols; + }); + for (const apkg of pkgList) { + if (iSymbolsMap[apkg.name]) { + apkg.properties = apkg.properties || []; + apkg.properties.push({ + name: "ImportedModules", + value: iSymbolsMap[apkg.name] + }); + } + } + } if (retMap.pkgList && retMap.pkgList.length) { pkgList = pkgList.concat(retMap.pkgList); for (const p of retMap.pkgList) { diff --git a/package-lock.json b/package-lock.json index d2e039330..8e22d046e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@cyclonedx/cdxgen", - "version": "9.9.0", + "version": "9.9.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@cyclonedx/cdxgen", - "version": "9.9.0", + "version": "9.9.1", "license": "Apache-2.0", "dependencies": { "@babel/parser": "^7.23.0", @@ -56,7 +56,7 @@ "node": ">=16" }, "optionalDependencies": { - "@appthreat/atom": "1.5.2", + "@appthreat/atom": "1.5.4", "@cyclonedx/cdxgen-plugins-bin": "^1.4.0", "@cyclonedx/cdxgen-plugins-bin-arm64": "^1.4.0", "@cyclonedx/cdxgen-plugins-bin-ppc64": "^1.4.0", @@ -91,9 +91,9 @@ } }, "node_modules/@appthreat/atom": { - "version": "1.5.2", - "resolved": "https://registry.npmjs.org/@appthreat/atom/-/atom-1.5.2.tgz", - "integrity": "sha512-PhL/xuuFB10+R7ErNEPgMdmoQX8B0RAQHIH3CPXe95PfiGKYh3NUea0Wt+/qjBnUxRofRVlRdns0s25sn0bsGQ==", + "version": "1.5.4", + "resolved": "https://registry.npmjs.org/@appthreat/atom/-/atom-1.5.4.tgz", + "integrity": "sha512-lpGjNgFOBwU/N2X2IuwupBww9vMW6NJUtN+Q1+1WfJFxhoxDSSHPlC2hILtCiNmTU4jmcVhrFP5S4x4bcktpFQ==", "optional": true, "dependencies": { "@babel/parser": "^7.23.0", diff --git a/package.json b/package.json index f8c7bd05d..dbd29065b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@cyclonedx/cdxgen", - "version": "9.9.0", + "version": "9.9.1", "description": "Creates CycloneDX Software Bill of Materials (SBOM) from source or container image", "homepage": "http://github.com/cyclonedx/cdxgen", "author": "Prabhu Subramanian ", @@ -83,7 +83,7 @@ "yargs": "^17.7.2" }, "optionalDependencies": { - "@appthreat/atom": "1.5.2", + "@appthreat/atom": "1.5.4", "@cyclonedx/cdxgen-plugins-bin": "^1.4.0", "@cyclonedx/cdxgen-plugins-bin-arm64": "^1.4.0", "@cyclonedx/cdxgen-plugins-bin-ppc64": "^1.4.0", diff --git a/utils.js b/utils.js index da33de703..3d26208f9 100644 --- a/utils.js +++ b/utils.js @@ -51,6 +51,13 @@ if (!url.startsWith("file://")) { } const dirNameStr = import.meta ? dirname(fileURLToPath(url)) : __dirname; const isWin = platform() === "win32"; +const isMac = platform() === "darwin"; +export let ATOM_DB = join(homedir(), ".local", "share", ".atomdb"); +if (isWin) { + ATOM_DB = join(homedir(), "AppData", "Local", ".atomdb"); +} else if (isMac) { + ATOM_DB = join(homedir(), "Library", "Application Support", ".atomdb"); +} const licenseMapping = JSON.parse( readFileSync(join(dirNameStr, "data", "lic-mapping.json")) @@ -116,6 +123,8 @@ const MAX_LICENSE_ID_LENGTH = 100; let PYTHON_CMD = "python"; if (process.env.PYTHON_CMD) { PYTHON_CMD = process.env.PYTHON_CMD; +} else if (process.env.CONDA_PYTHON_EXE) { + PYTHON_CMD = process.env.CONDA_PYTHON_EXE; } // Custom user-agent for cdxgen @@ -2426,13 +2435,23 @@ export const getPyMetadata = async function (pkgList, fetchDepsInfo) { cdepList.push(p); continue; } + const origName = p.name; // Some packages support extra modules if (p.name.includes("[")) { p.name = p.name.split("[")[0]; } - const res = await cdxgenAgent.get(PYPI_URL + p.name + "/json", { - responseType: "json" - }); + let res = undefined; + try { + res = await cdxgenAgent.get(PYPI_URL + p.name + "/json", { + responseType: "json" + }); + } catch (err) { + // retry by prefixing django- to the package name + res = await cdxgenAgent.get(PYPI_URL + "django-" + p.name + "/json", { + responseType: "json" + }); + p.name = "django-" + p.name; + } const body = res.body; if (body.info.author && body.info.author.trim() !== "") { if (body.info.author_email && body.info.author_email.trim() !== "") { @@ -2538,6 +2557,10 @@ export const getPyMetadata = async function (pkgList, fetchDepsInfo) { name: "cdx:pypi:latest_version", value: body.info.version }); + p.properties.push({ + name: "cdx:pypi:resolved_from", + value: origName + }); } if ( body.releases && @@ -2551,12 +2574,22 @@ export const getPyMetadata = async function (pkgList, fetchDepsInfo) { p._integrity = "md5-" + digest["md5"]; } } + const purlString = new PackageURL( + "pypi", + "", + p.name, + p.version, + null, + null + ).toString(); + p.purl = purlString; + p["bom-ref"] = decodeURIComponent(purlString); cdepList.push(p); } catch (err) { if (DEBUG_MODE) { console.error(p.name, "is not found on PyPI."); console.log( - "If this package is available from PyPI or a registry, its name might be different to the module name. Raise a ticket at https://github.com/CycloneDX/cdxgen/issues so that this could be added to the mapping file pypi-pkg-aliases.json" + "If this package is available from PyPI or a registry, its name might be different from the module name. Raise a ticket at https://github.com/CycloneDX/cdxgen/issues so that this can be added to the mapping file pypi-pkg-aliases.json" ); console.log( "Alternatively, if this is a package that gets installed directly in your environment and offers a python binding, then track such packages manually." @@ -2584,6 +2617,16 @@ export const getPyMetadata = async function (pkgList, fetchDepsInfo) { } }; } + const purlString = new PackageURL( + "pypi", + "", + p.name, + p.version, + null, + null + ).toString(); + p.purl = purlString; + p["bom-ref"] = decodeURIComponent(purlString); cdepList.push(p); } } @@ -2973,35 +3016,29 @@ export const getPyModules = async (src, epkgList, options) => { const allImports = {}; const dependenciesList = []; let modList = []; + const slicesFile = resolve( + options.depsSlicesFile || options.usagesSlicesFile + ); // Issue: 615 fix. Reuse existing slices file - // FIXME: The argument is called usagesSlicesFile while the atom command used is parsedeps. - // This logic could be rewritten while implementing evinse for python to that the analysis works for either type of slice - if (options.usagesSlicesFile && existsSync(options.usagesSlicesFile)) { - const slicesData = JSON.parse( - readFileSync(options.usagesSlicesFile, "utf-8") - ); + if (slicesFile && existsSync(slicesFile)) { + const slicesData = JSON.parse(readFileSync(slicesFile, "utf-8")); if (slicesData && Object.keys(slicesData) && slicesData.modules) { modList = slicesData.modules; } else { modList = slicesData; } } else { - modList = findAppModules( - src, - "python", - "parsedeps", - options.usagesSlicesFile - ); + modList = findAppModules(src, "python", "parsedeps", slicesFile); } const pyDefaultModules = new Set(PYTHON_STD_MODULES); - const filteredModList = modList.filter( + modList = modList.filter( (x) => !pyDefaultModules.has(x.name.toLowerCase()) && !x.name.startsWith("_") && !x.name.startsWith(".") ); - let pkgList = filteredModList.map((p) => { - return { + let pkgList = modList.map((p) => { + const apkg = { name: PYPI_MODULE_PACKAGE_MAPPING[p.name.toLowerCase()] || PYPI_MODULE_PACKAGE_MAPPING[p.name.replace(/_/g, "-").toLowerCase()] || @@ -3015,6 +3052,13 @@ export const getPyModules = async (src, epkgList, options) => { } ] }; + if (p.importedSymbols) { + apkg.properties.push({ + name: "ImportedModules", + value: p.importedSymbols + }); + } + return apkg; }); pkgList = pkgList.filter( (obj, index) => pkgList.findIndex((i) => i.name === obj.name) === index @@ -3038,7 +3082,7 @@ export const getPyModules = async (src, epkgList, options) => { }); } } - return { allImports, pkgList, dependenciesList }; + return { allImports, pkgList, dependenciesList, modList }; }; /** @@ -6862,9 +6906,19 @@ const flattenDeps = (dependenciesMap, pkgList, reqOrSetupFile, t) => { if (!dependenciesMap[pkgRef]) { dependenciesMap[pkgRef] = []; } + const purlString = new PackageURL( + "pypi", + "", + d.name, + d.version, + null, + null + ).toString(); pkgList.push({ name: d.name, version: d.version, + purl: purlString, + "bom-ref": decodeURIComponent(purlString), properties: [ { name: "SrcFile", @@ -6874,11 +6928,11 @@ const flattenDeps = (dependenciesMap, pkgList, reqOrSetupFile, t) => { evidence: { identity: { field: "purl", - confidence: 1, + confidence: 0.8, methods: [ { technique: "manifest-analysis", - confidence: 1, + confidence: 0.8, value: reqOrSetupFile } ] @@ -6918,6 +6972,7 @@ export const getPipFrozenTree = (basePath, reqOrSetupFile, tempVenvDir) => { */ if ( !process.env.VIRTUAL_ENV && + !process.env.CONDA_PREFIX && reqOrSetupFile && !reqOrSetupFile.endsWith("poetry.lock") ) { @@ -7106,7 +7161,10 @@ export const getPipFrozenTree = (basePath, reqOrSetupFile, tempVenvDir) => { } } // Bug #375. Attempt pip freeze on existing and new virtual environments - if (env.VIRTUAL_ENV && env.VIRTUAL_ENV.length) { + if ( + (env.VIRTUAL_ENV && env.VIRTUAL_ENV.length) || + (env.CONDA_PREFIX && env.CONDA_PREFIX.length) + ) { /** * At this point, the previous attempt to do a pip install might have failed and we might have an unclean virtual environment with an incomplete list * The position taken by cdxgen is "Some SBOM is better than no SBOM", so we proceed to collecting the dependencies that got installed with pip freeze @@ -7136,9 +7194,19 @@ export const getPipFrozenTree = (basePath, reqOrSetupFile, tempVenvDir) => { const version = t.version; let exclude = ["pip", "setuptools", "wheel"]; if (!exclude.includes(name)) { + const purlString = new PackageURL( + "pypi", + "", + name, + version, + null, + null + ).toString(); pkgList.push({ name, version, + purl: purlString, + "bom-ref": decodeURIComponent(purlString), evidence: { identity: { field: "purl", @@ -7147,7 +7215,7 @@ export const getPipFrozenTree = (basePath, reqOrSetupFile, tempVenvDir) => { { technique: "instrumentation", confidence: 1, - value: env.VIRTUAL_ENV + value: env.VIRTUAL_ENV || env.CONDA_PREFIX } ] } diff --git a/utils.test.js b/utils.test.js index eb86b8941..93b04f5f8 100644 --- a/utils.test.js +++ b/utils.test.js @@ -1192,7 +1192,7 @@ test("parse github actions workflow data", async () => { dep_list = parseGitHubWorkflowData( readFileSync("./.github/workflows/repotests.yml", { encoding: "utf-8" }) ); - expect(dep_list.length).toEqual(7); + expect(dep_list.length).toEqual(8); expect(dep_list[0]).toEqual({ group: "actions", name: "checkout",