diff --git a/.github/workflows/repotests.yml b/.github/workflows/repotests.yml index 4de362d57e..f9bf7da4cd 100644 --- a/.github/workflows/repotests.yml +++ b/.github/workflows/repotests.yml @@ -155,6 +155,8 @@ jobs: bin/cdxgen.js -p -t java repotests/java-sec-code -o bomresults/bom-java-sec-code.json --required-only bin/cdxgen.js -p -t java repotests/java-sec-code -o bomresults/bom-java-sec-code.json --filter postgres --filter json bin/cdxgen.js -p -t java repotests/java-sec-code -o bomresults/bom-java-sec-code.json --only spring + bin/cdxgen.js -p -t java repotests/java-sec-code -o repotests/java-sec-code/bom.json --deep + node bin/evinse.js -i repotests/java-sec-code/bom.json -o bomresults/java-sec-code.evinse.json -l java --with-reachables -p repotests/java-sec-code bin/cdxgen.js -p -r -t java repotests/shiftleft-java-example -o bomresults/bom-java.json --generate-key-and-sign node bin/evinse.js -i bomresults/bom-java.json -o bomresults/bom-java.evinse.json -l java --with-data-flow -p repotests/shiftleft-java-example SBOM_SIGN_ALGORITHM=RS512 SBOM_SIGN_PRIVATE_KEY=bomresults/private.key SBOM_SIGN_PUBLIC_KEY=bomresults/public.key bin/cdxgen.js -p -r -t github repotests/shiftleft-java-example -o bomresults/bom-github.json diff --git a/README.md b/README.md index d938e6d419..b9fb8682b8 100644 --- a/README.md +++ b/README.md @@ -377,6 +377,7 @@ cdxgen can retain the dependency tree under the `dependencies` attribute for a s | CDX_MAVEN_INCLUDE_TEST_SCOPE | Whether test scoped dependencies should be included from Maven projects, Default: true | | ASTGEN_IGNORE_DIRS | Comma separated list of directories to ignore while analyzing using babel. The environment variable is also used by atom and astgen. | | ASTGEN_IGNORE_FILE_PATTERN | Ignore regex to use | +| PYPI_URL | Override pypi url. Default: https://pypi.org/pypi/ | ## Plugins diff --git a/bin/evinse.js b/bin/evinse.js index 88897bb2c8..2cc3d2192d 100755 --- a/bin/evinse.js +++ b/bin/evinse.js @@ -98,6 +98,12 @@ const args = yargs(hideBin(process.argv)) default: false, type: "boolean" }) + .option("with-reachables", { + description: + "Enable auto-tagged reachable slicing. Requires SBOM generated with --deep mode.", + default: false, + type: "boolean" + }) .option("usages-slices-file", { description: "Use an existing usages slices file.", default: "usages.slices.json" @@ -106,6 +112,10 @@ const args = yargs(hideBin(process.argv)) description: "Use an existing data-flow slices file.", default: "data-flow.slices.json" }) + .option("reachables-slices-file", { + description: "Use an existing reachables slices file.", + default: "reachables.slices.json" + }) .option("print", { alias: "p", type: "boolean", diff --git a/binary.js b/binary.js index d80555d059..84b5c60e45 100644 --- a/binary.js +++ b/binary.js @@ -1,5 +1,11 @@ -import { platform as _platform, arch as _arch, tmpdir } from "node:os"; -import { existsSync, mkdtempSync, readFileSync, rmSync } from "node:fs"; +import { platform as _platform, arch as _arch, tmpdir, homedir } from "node:os"; +import { + existsSync, + mkdirSync, + mkdtempSync, + readFileSync, + rmSync +} from "node:fs"; import { join, dirname, basename } from "node:path"; import { spawnSync } from "node:child_process"; import { PackageURL } from "packageurl-js"; @@ -284,6 +290,13 @@ export const getOSPackages = (src) => { const allTypes = new Set(); if (TRIVY_BIN) { let imageType = "image"; + const trivyCacheDir = join(homedir(), ".cache", "trivy"); + try { + mkdirSync(join(trivyCacheDir, "db"), { recursive: true }); + mkdirSync(join(trivyCacheDir, "java-db"), { recursive: true }); + } catch (err) { + // ignore errors + } if (existsSync(src)) { imageType = "rootfs"; } @@ -292,12 +305,17 @@ export const getOSPackages = (src) => { const args = [ imageType, "--skip-db-update", + "--skip-java-db-update", "--offline-scan", + "--skip-files", + "**/*.jar", "--no-progress", "--exit-code", "0", "--format", "cyclonedx", + "--cache-dir", + trivyCacheDir, "--output", bomJsonFile ]; diff --git a/docker.js b/docker.js index 79e9a62b13..5fe2c7ea68 100644 --- a/docker.js +++ b/docker.js @@ -333,7 +333,12 @@ export const parseImageName = (fullImageName) => { */ export const getImage = async (fullImageName) => { let localData = undefined; + let pullData = undefined; const { repo, tag, digest } = parseImageName(fullImageName); + let repoWithTag = `${repo}:${tag !== "" ? tag : ":latest"}`; + if (repoWithTag.startsWith("library/")) { + repoWithTag = repoWithTag.replace("library/", ""); + } // Fetch only the latest tag if none is specified if (tag === "" && digest === "") { fullImageName = fullImageName + ":latest"; @@ -379,6 +384,14 @@ export const getImage = async (fullImageName) => { } } } + try { + localData = await makeRequest(`images/${repoWithTag}/json`); + if (localData) { + return localData; + } + } catch (err) { + // ignore + } try { localData = await makeRequest(`images/${repo}/json`); } catch (err) { @@ -397,7 +410,7 @@ export const getImage = async (fullImageName) => { } // If the data is not available locally try { - const pullData = await makeRequest( + pullData = await makeRequest( `images/create?fromImage=${fullImageName}`, "POST" ); @@ -415,15 +428,42 @@ export const getImage = async (fullImageName) => { return undefined; } } catch (err) { - // continue regardless of error + try { + if (DEBUG_MODE) { + console.log(`Re-trying the pull with the name ${repoWithTag}.`); + } + pullData = await makeRequest( + `images/create?fromImage=${repoWithTag}`, + "POST" + ); + } catch (err) { + // continue regardless of error + } } try { if (DEBUG_MODE) { - console.log(`Trying with ${repo}`); + console.log(`Trying with ${repoWithTag}`); + } + localData = await makeRequest(`images/${repoWithTag}/json`); + if (localData) { + return localData; } - localData = await makeRequest(`images/${repo}/json`); } catch (err) { try { + if (DEBUG_MODE) { + console.log(`Trying with ${repo}`); + } + localData = await makeRequest(`images/${repo}/json`); + if (localData) { + return localData; + } + } catch (err) { + // continue regardless of error + } + try { + if (DEBUG_MODE) { + console.log(`Trying with ${fullImageName}`); + } localData = await makeRequest(`images/${fullImageName}/json`); } catch (err) { // continue regardless of error diff --git a/docs/ADVANCED.md b/docs/ADVANCED.md index f35db074ab..330e8a3334 100644 --- a/docs/ADVANCED.md +++ b/docs/ADVANCED.md @@ -133,13 +133,18 @@ Options: directory. Useful to improve the recall for cal lstack evidence. [boolean] [default: false] --annotate Include contents of atom slices as annotations - [boolean] [default: true] + [boolean] [default: false] --with-data-flow Enable inter-procedural data-flow slicing. [boolean] [default: false] + --with-reachables Enable auto-tagged reachable slicing. Requires + SBOM generated with --deep mode. + [boolean] [default: false] --usages-slices-file Use an existing usages slices file. [default: "usages.slices.json"] --data-flow-slices-file Use an existing data-flow slices file. [default: "data-flow.slices.json"] + --reachables-slices-file Use an existing reachables slices file. + [default: "reachables.slices.json"] -p, --print Print the evidences as table [boolean] --version Show version number [boolean] -h Show help [boolean] @@ -151,18 +156,38 @@ To generate an SBOM with evidence for a java project. evinse -i bom.json -o bom.evinse.json ``` -By default, only occurrence evidences are determined by creating usages slices. To generate callstack evidence, pass `--with-data-flow` +By default, only occurrence evidences are determined by creating usages slices. To generate callstack evidence, pass either `--with-data-flow` or `--with-reachables`. + +#### Reachability-based callstack evidence + +atom supports reachability-based slicing for Java applications. Two necessary prerequisites for this slicing mode are that the input SBOM must be generated in deep mode (with --deep argument) and must be placed within the application directory. + +```shell +cd +cdxgen -t java --deep -o bom.json . +evinse -i bom.json -o bom.evinse.json --with-reachables . +``` + +This is because + +#### Data Flow based slicing + +Often reachability cannot be computed reliably due to the presence of wrapper libraries or mitigating layers. In such cases, data-flow based slicing can be used to compute callstack using a reverse reachability algorithm. This is however a time and resource-consuming operation and might even require atom to be run externally in [java mode](https://cyclonedx.github.io/cdxgen/#/ADVANCED?id=use-atom-in-java-mode). ```shell evinse -i bom.json -o bom.evinse.json --with-data-flow ``` +#### Performance tuning + To improve performance, you can cache the generated usages and data-flow slices file along with the bom file. ```shell evinse -i bom.json -o bom.evinse.json --usages-slices-file usages.json --data-flow-slices-file data-flow.json --with-data-flow ``` +#### Other languages + For JavaScript or TypeScript projects, pass `-l javascript`. ```shell diff --git a/docs/ENV.md b/docs/ENV.md index b0f1f8ae6c..4af6a250a2 100644 --- a/docs/ENV.md +++ b/docs/ENV.md @@ -36,3 +36,4 @@ The following environment variables are available to configure the bom generatio | CDX_MAVEN_INCLUDE_TEST_SCOPE | Whether test scoped dependencies should be included from Maven projects, Default: true | | ASTGEN_IGNORE_DIRS | Comma separated list of directories to ignore while analyzing using babel. The environment variable is also used by atom and astgen. | | ASTGEN_IGNORE_FILE_PATTERN | Ignore regex to use | +| PYPI_URL | Override pypi url. Default: https://pypi.org/pypi/ | diff --git a/evinser.js b/evinser.js index d663b8f21f..87a7518cef 100644 --- a/evinser.js +++ b/evinser.js @@ -260,8 +260,10 @@ export const analyzeProject = async (dbObjMap, options) => { const language = options.language; let usageSlice = undefined; let dataFlowSlice = undefined; + let reachablesSlice = undefined; let usagesSlicesFile = undefined; let dataFlowSlicesFile = undefined; + let reachablesSlicesFile = undefined; let dataFlowFrames = {}; let servicesMap = {}; let retMap = {}; @@ -330,10 +332,36 @@ export const analyzeProject = async (dbObjMap, options) => { purlImportsMap ); } + if (options.withReachables) { + if ( + options.reachablesSlicesFile && + fs.existsSync(options.reachablesSlicesFile) + ) { + reachablesSlicesFile = options.reachablesSlicesFile; + reachablesSlice = JSON.parse( + fs.readFileSync(options.reachablesSlicesFile, "utf-8") + ); + } else { + retMap = createSlice(language, dirPath, "reachables"); + if (retMap && retMap.slicesFile && fs.existsSync(retMap.slicesFile)) { + reachablesSlicesFile = retMap.slicesFile; + reachablesSlice = JSON.parse( + fs.readFileSync(retMap.slicesFile, "utf-8") + ); + console.log( + `To speed up this step, cache ${reachablesSlicesFile} and invoke evinse with the --reachables-slices-file argument.` + ); + } + } + } + if (reachablesSlice && Object.keys(reachablesSlice).length) { + dataFlowFrames = await collectReachableFrames(language, reachablesSlice); + } return { atomFile: retMap.atomFile, usagesSlicesFile, dataFlowSlicesFile, + reachablesSlicesFile, purlLocationMap, servicesMap, dataFlowFrames, @@ -752,6 +780,7 @@ export const createEvinseFile = (sliceArtefacts, options) => { tempDir, usagesSlicesFile, dataFlowSlicesFile, + reachablesSlicesFile, purlLocationMap, servicesMap, dataFlowFrames @@ -830,6 +859,14 @@ export const createEvinseFile = (sliceArtefacts, options) => { text: fs.readFileSync(dataFlowSlicesFile, "utf8") }); } + if (reachablesSlicesFile && fs.existsSync(reachablesSlicesFile)) { + bomJson.annotations.push({ + subjects: [bomJson.serialNumber], + annotator: { component: bomJson.metadata.tools.components[0] }, + timestamp: new Date().toISOString(), + text: fs.readFileSync(reachablesSlicesFile, "utf8") + }); + } } // Increment the version bomJson.version = (bomJson.version || 1) + 1; @@ -973,6 +1010,46 @@ export const collectDataFlowFrames = async ( return dfFrames; }; +/** + * Method to convert reachable slice into usable callstack frames + * Implemented based on the logic proposed here - https://github.com/AppThreat/atom/blob/main/specification/docs/slices.md#data-flow-slice + * + * @param {string} language Application language + * @param {object} reachablesSlice Reachables slice object from atom + */ +export const collectReachableFrames = async (language, reachablesSlice) => { + const reachableNodes = reachablesSlice?.reachables || []; + // purl key and an array of frames array + // CycloneDX 1.5 currently accepts only 1 frame as evidence + // so this method is more future-proof + const dfFrames = {}; + for (const anode of reachableNodes) { + let aframe = []; + let referredPurls = new Set(anode.purls || []); + for (const fnode of anode.flows) { + aframe.push({ + package: fnode.parentPackageName, + module: fnode.parentClassName || "", + function: fnode.parentMethodName || "", + line: fnode.lineNumber || undefined, + column: fnode.columnNumber || undefined, + fullFilename: fnode.parentFileName || "" + }); + } + referredPurls = Array.from(referredPurls); + if (referredPurls.length) { + for (const apurl of referredPurls) { + if (!dfFrames[apurl]) { + dfFrames[apurl] = []; + } + // Store this frame as an evidence for this purl + dfFrames[apurl].push(aframe); + } + } + } + return dfFrames; +}; + /** * Method to pick a callstack frame as an evidence. This method is required since CycloneDX 1.5 accepts only a single frame as evidence. * diff --git a/index.js b/index.js index 8ddde63e16..995c24b739 100644 --- a/index.js +++ b/index.js @@ -1053,7 +1053,7 @@ const buildBomNSData = (options, pkgInfo, ptype, context) => { * @param path to the project * @param options Parse options from the cli */ -export const createJarBom = (path, options) => { +export const createJarBom = async (path, options) => { let pkgList = []; let jarFiles = []; let nsMapping = {}; @@ -1094,6 +1094,9 @@ export const createJarBom = (path, options) => { if (dlist && dlist.length) { pkgList = pkgList.concat(dlist); } + if (pkgList.length) { + pkgList = await getMvnMetadata(pkgList); + } } // Clean up if (tempDir && tempDir.startsWith(tmpdir()) && rmSync) { @@ -1757,11 +1760,11 @@ export const createJavaBom = async (path, options) => { // write to the existing plugins file if (useSlashSyntax) { sbtArgs = [ - `'set asciiGraphWidth := 400' "dependencyTree / toFile ${dlFile} --force"` + `'set ThisBuild / asciiGraphWidth := 400' "dependencyTree / toFile ${dlFile} --force"` ]; } else { sbtArgs = [ - `'set asciiGraphWidth := 400' "dependencyTree::toFile ${dlFile} --force"` + `'set asciiGraphWidth in ThisBuild := 400' "dependencyTree::toFile ${dlFile} --force"` ]; } pluginFile = addPlugin(basePath, sbtPluginDefinition); @@ -4729,7 +4732,7 @@ export const createMultiXBom = async (pathList, options) => { } // Jar scanning is enabled by default // See #330 - bomData = createJarBom(path, options); + bomData = await createJarBom(path, options); if ( bomData && bomData.bomJson && @@ -4755,7 +4758,7 @@ export const createMultiXBom = async (pathList, options) => { } } // for if (options.lastWorkingDir && options.lastWorkingDir !== "") { - bomData = createJarBom(options.lastWorkingDir, options); + bomData = await createJarBom(options.lastWorkingDir, options); if ( bomData && bomData.bomJson && @@ -5229,20 +5232,20 @@ export const createBom = async (path, options) => { case "sbt": return await createJavaBom(path, options); case "jar": - return createJarBom(path, options); + return await createJarBom(path, options); case "gradle-index": case "gradle-cache": options.useGradleCache = true; - return createJarBom(GRADLE_CACHE_DIR, options); + return await createJarBom(GRADLE_CACHE_DIR, options); case "sbt-index": case "sbt-cache": options.useSbtCache = true; - return createJarBom(SBT_CACHE_DIR, options); + return await createJarBom(SBT_CACHE_DIR, options); case "maven-index": case "maven-cache": case "maven-repo": options.useMavenCache = true; - return createJarBom( + return await createJarBom( process.env.MAVEN_CACHE_DIR || join(homedir(), ".m2", "repository"), options ); diff --git a/utils.js b/utils.js index 6f48f31f5c..e71088cf4b 100644 --- a/utils.js +++ b/utils.js @@ -2390,7 +2390,7 @@ export const getPyMetadata = async function (pkgList, fetchDepsInfo) { if (!FETCH_LICENSE && !fetchDepsInfo) { return pkgList; } - const PYPI_URL = "https://pypi.org/pypi/"; + const PYPI_URL = process.env.PYPI_URL || "https://pypi.org/pypi/"; const cdepList = []; for (const p of pkgList) { if (!p || !p.name) { @@ -5075,7 +5075,6 @@ export const parsePaketLockData = async function (paketLockData) { dependenciesList }; }; - /** * Parse composer lock file * @@ -6359,11 +6358,8 @@ export const extractJarArchive = function ( } } if (name && version) { - // If group and name are the same we only need the name - if (group == name) { - group = ""; - } - group = group === "." ? "" : encodeForPurl(group || "") || ""; + // if group is empty use name as group + group = encodeForPurl(group === "." ? name : group || name) || ""; let apkg = { group, name: name ? encodeForPurl(name) : "", @@ -7804,6 +7800,44 @@ async function getNugetUrl() { } async function queryNuget(p, NUGET_URL) { + function setLatestVersion(upper) { + // Handle special case for versions with more than 3 parts + if (upper.split(".").length > 3) { + let tmpVersionArray = upper.split("-")[0].split("."); + // Compromise for versions such as 1.2.3.0-alpha + // How to find latest proper release version? + if ( + upper.split("-").length > 1 && + Number(tmpVersionArray.slice(-1)) === 0 + ) { + return upper; + } else if (upper.split("-").length > 1) { + tmpVersionArray[tmpVersionArray.length - 1] = ( + Number(tmpVersionArray.slice(-1)) - 1 + ).toString(); + } + return tmpVersionArray.join("."); + } else { + const tmpVersion = parse(upper); + let version = + tmpVersion.major + "." + tmpVersion.minor + "." + tmpVersion.patch; + if (compare(version, upper) === 1) { + if (tmpVersion.patch > 0) { + version = + tmpVersion.major + + "." + + tmpVersion.minor + + "." + + (tmpVersion.patch - 1).toString(); + } + } + return version; + } + } + // Coerce only when missing patch/minor version + function coerceUp(version) { + return version.split(".").length < 3 ? coerce(version).version : version; + } if (DEBUG_MODE) { console.log(`Querying nuget for ${p.name}`); } @@ -7820,62 +7854,48 @@ async function queryNuget(p, NUGET_URL) { } if (items[0] && !items[0].items) { if (!p.version || p.version === "0.0.0" || p.version === "latest") { - const tmpVersion = parse(res.body.items[res.body.items.length - 1].upper); - np.version = - tmpVersion.major + "." + tmpVersion.minor + "." + tmpVersion.patch; - if ( - compare(np.version, res.body.items[res.body.items.length - 1].upper) === - 1 - ) { - if (tmpVersion.patch > 0) { - np.version = - tmpVersion.major + - "." + - tmpVersion.minor + - "." + - (tmpVersion.patch - 1).toString(); - } - } + let upper = items[items.length - 1].upper; + np.version = setLatestVersion(upper); } for (const item of items) { - // if (!p.version || p.version === "0.0.0" || p.version === "latest") { - // const tmpVersion = parse(res.body.items[res.body.items.length - 1].upper); - // np.version = tmpVersion.major + "." + tmpVersion.minor + "." + tmpVersion.patch; - // - // } - if (np.version && valid(np.version)) { - let lower = compare(item.lower, np.version); - let upper = compare(item.upper, np.version); + if (np.version) { + let lower = compare(coerce(item.lower), coerce(np.version)); + let upper = compare(coerce(item.upper), coerce(np.version)); if (lower !== 1 && upper !== -1) { res = await cdxgenAgent.get(item["@id"], { responseType: "json" }); - newBody.push( - res.body.items - .reverse() - .filter( - (i) => i.catalogEntry && i.catalogEntry.version === np.version - ) - ); - break; + for (const i of res.body.items.reverse()) { + if ( + i.catalogEntry && + i.catalogEntry.version === coerceUp(np.version) + ) { + newBody.push(i); + return [np, newBody]; + } + } } } } } else { if (!p.version || p.version === "0.0.0" || p.version === "latest") { - const tmpVersion = parse(res.body.items[res.body.items.length - 1].upper); - np.version = - tmpVersion.major + "." + tmpVersion.minor + "." + tmpVersion.patch; + let upper = items[items.length - 1].upper; + np.version = setLatestVersion(upper); } - const firstItem = items[0]; - // Work backwards to find the body for the matching version - // body.push(firstItem.items[firstItem.items.length - 1]) if (np.version) { - newBody.push( - firstItem.items - .reverse() - .filter( - (i) => i.catalogEntry && i.catalogEntry.version === np.version - ) - ); + for (const item of items) { + let lower = compare(coerce(item.lower), coerce(np.version)); + let upper = compare(coerce(item.upper), coerce(np.version)); + if (lower !== 1 && upper !== -1) { + for (const i of item.items.reverse()) { + if ( + i.catalogEntry && + i.catalogEntry.version === coerceUp(np.version) + ) { + newBody.push(i); + return [np, newBody]; + } + } + } + } } } return [np, newBody]; @@ -7924,8 +7944,8 @@ export const getNugetMetadata = async function ( depRepList[oldRef] = p["bom-ref"]; p.version = np.version; } - if (newBody && newBody[0].length > 0) { - body = newBody[0][0]; + if (newBody && newBody.length > 0) { + body = newBody[0]; } if (body) { metadata_cache[cacheKey] = body;