From 010db37b50a28b65b798448a2c7612926f8d40b3 Mon Sep 17 00:00:00 2001 From: Prabhu Subramanian Date: Wed, 13 Nov 2024 12:40:50 +0000 Subject: [PATCH] obom tagging Signed-off-by: Prabhu Subramanian --- ci/Dockerfile | 2 +- ci/base-images/cdxgen/Dockerfile.python | 2 +- ci/base-images/sle/Dockerfile.lang | 2 +- data/component-tags.json | 356 +++++++++++--------- lib/stages/postgen/annotator.js | 91 ++++- lib/stages/postgen/annotator.test.js | 9 +- lib/stages/postgen/postgen.js | 5 +- types/lib/stages/postgen/annotator.d.ts | 13 +- types/lib/stages/postgen/annotator.d.ts.map | 2 +- types/lib/stages/postgen/postgen.d.ts.map | 2 +- 10 files changed, 303 insertions(+), 181 deletions(-) diff --git a/ci/Dockerfile b/ci/Dockerfile index d646709cd..2e7b6658d 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -21,7 +21,7 @@ ARG SBT_VERSION=1.10.2 ARG MAVEN_VERSION=3.9.9 ARG GRADLE_VERSION=8.10 ARG GO_VERSION=1.23.1 -ARG NODE_VERSION=23.1.0 +ARG NODE_VERSION=23.2.0 ARG PYTHON_VERSION=3.12 ENV GOPATH=/opt/app-root/go \ diff --git a/ci/base-images/cdxgen/Dockerfile.python b/ci/base-images/cdxgen/Dockerfile.python index cc1dcee69..c86ca9bc8 100644 --- a/ci/base-images/cdxgen/Dockerfile.python +++ b/ci/base-images/cdxgen/Dockerfile.python @@ -11,7 +11,7 @@ LABEL maintainer="CycloneDX" \ org.opencontainers.image.description="Rolling image with cdxgen SBOM generator for Python 3.12 apps" \ org.opencontainers.docker.cmd="docker run --rm -v /tmp:/tmp -p 9090:9090 -v $(pwd):/app:rw -t ghcr.io/cyclonedx/cdxgen-python:v10 -r /app --server" -ARG NODE_VERSION=23.1.0 +ARG NODE_VERSION=23.2.0 ENV NVM_DIR="/root/.nvm" \ PYTHON_CMD=python3 \ diff --git a/ci/base-images/sle/Dockerfile.lang b/ci/base-images/sle/Dockerfile.lang index 9df2442ef..1c216c90c 100644 --- a/ci/base-images/sle/Dockerfile.lang +++ b/ci/base-images/sle/Dockerfile.lang @@ -3,7 +3,7 @@ FROM registry.suse.com/bci/python:3.12 ARG JAVA_VERSION=23-tem ARG MAVEN_VERSION=3.9.9 ARG GCC_VERSION=13 -ARG NODE_VERSION=23.1.0 +ARG NODE_VERSION=23.2.0 ENV JAVA_VERSION=$JAVA_VERSION \ MAVEN_VERSION=$MAVEN_VERSION \ diff --git a/data/component-tags.json b/data/component-tags.json index 1d89b0e95..9572c9c22 100644 --- a/data/component-tags.json +++ b/data/component-tags.json @@ -1,157 +1,203 @@ { - "description": [ - "sql", - "http", - "xml", - "web", - "security", - "database", - "json", - "yaml", - "validation", - "sanitization", - "cloud", - "iam", - "auth", - "middleware", - "serialization", - "event", - "stream", - "rpc", - "socket", - "proto", - "resource", - "data", - "sensitive", - "template", - "log", - "logging", - "service", - "api", - "slf4j", - "parse", - "emit", - "jdbc", - "connect", - "pool", - "beans", - "transaction", - "mysql", - "postgres", - "oracle", - "mongo", - "redis", - "splunk", - "stripe", - "payment", - "finance", - "currency", - "coin", - "monero", - "ssl", - "traffic", - "mvc", - "html", - "escape", - "unescape", - "rest", - "tomcat", - "hibernate", - "orm", - "aop", - "jwt", - "saml", - "token", - "tls", - "codec", - "cron", - "crypto", - "jce", - "certificate", - "developer", - "tools", - "autoconfigure", - "test", - "jsonpath", - "bytecode", - "mock", - "injection", - "comparators", - "transform", - "encode", - "decode", - "ldap", - "owasp", - "fileupload", - "beanshell", - "spel", - "mail", - "apacheds", - "jndi", - "ldif", - "jdbm", - "kerberos", - "oidc", - "oauth2", - "cli", - "binary", - "ml", - "ai", - "azure", - "gcp", - "terraform", - "redis", - "valkey", - "lint", - "bundle", - "object-persistence", - "text-to-image", - "translat", - "object-detect", - "mvc", - "framework", - "graph", - "templates", - "fastjson", - "simd", - "event-driven", - "productivity", - "typesafe", - "projections", - "performance", - "plugins", - "non-block", - "microsoft" - ], - "properties": [ - "sql", - "http", - "xml", - "cloud", - "middleware", - "framework", - "bluetooth", - "wifi", - "wireless", - "driver", - "graphics", - "firmware", - "gyroscope", - "accelerometer", - "mobile", - "network", - "battery", - "matrix", - "thunderbolt", - "crypto", - "algorithm", - "encrypt", - "decrypt", - "registry", - "maps", - "payment", - "stripe", - "apple-pay", - "icloud" - ] + "description": { + "all": [ + "sql", + "http", + "xml", + "web", + "security", + "database", + "json", + "yaml", + "validation", + "sanitization", + "cloud", + "iam", + "auth", + "middleware", + "serialization", + "event", + "stream", + "rpc", + "socket", + "proto", + "resource", + "data", + "sensitive", + "template", + "log", + "logging", + "service", + "api", + "slf4j", + "parse", + "emit", + "jdbc", + "connect", + "pool", + "beans", + "transaction", + "mysql", + "postgres", + "oracle", + "mongo", + "redis", + "splunk", + "stripe", + "payment", + "finance", + "currency", + "coin", + "monero", + "ssl", + "traffic", + "mvc", + "html", + "escape", + "unescape", + "rest", + "tomcat", + "hibernate", + "orm", + "aop", + "jwt", + "saml", + "token", + "tls", + "codec", + "cron", + "crypto", + "jce", + "certificate", + "developer", + "tools", + "autoconfigure", + "test", + "jsonpath", + "bytecode", + "mock", + "injection", + "comparators", + "transform", + "encode", + "decode", + "ldap", + "owasp", + "fileupload", + "beanshell", + "spel", + "mail", + "apacheds", + "jndi", + "ldif", + "jdbm", + "kerberos", + "oidc", + "oauth2", + "cli", + "binary", + "ml", + "ai", + "azure", + "gcp", + "terraform", + "redis", + "valkey", + "lint", + "bundle", + "object-persistence", + "text-to-image", + "translat", + "object-detect", + "mvc", + "framework", + "graph", + "templates", + "fastjson", + "simd", + "event-driven", + "productivity", + "typesafe", + "projections", + "performance", + "plugins", + "non-block", + "microsoft" + ] + }, + "properties": { + "all": [ + "sql", + "http", + "xml", + "cloud", + "middleware", + "framework", + "bluetooth", + "wifi", + "wireless", + "driver", + "graphics", + "firmware", + "gyroscope", + "accelerometer", + "mobile", + "network", + "battery", + "matrix", + "thunderbolt", + "crypto", + "algorithm", + "encrypt", + "decrypt", + "registry", + "maps", + "payment", + "stripe", + "apple-pay", + "icloud" + ] + }, + "name": { + "obom": [ + { "devel": ["-(dev|devel|headers|sdk|libs)$", "^(git)-"] }, + { "bin": ["(-bin)$"] }, + { "kernel": ["^(linux|kernel)"] }, + { + "security": [ + "(selinux|apparmor|security|openssl|libressl|gnutls|jose|keyutils|passwd)" + ] + }, + { + "container": [ + "(container|podman|docker|runc|nerdctl|crun|libvirt|qemu)" + ] + }, + { + "build": ["(cpp|fortran|gcc|make|meson|bazel|maven|gradle|sbt|ant|gdb)"] + }, + { + "network": [ + "(tailscale|wireguard|openvpn|dns|cockpit|cups|dhcp|network|iproute|iptables|mosh|netavark|openssh|rsync|tcpdump)" + ] + }, + { "webserver": ["(httpd|http2)"] }, + { "crypto": ["(crypt|gpg|keys|certificates|gnupg)"] }, + { "repository": ["(-repos|-release|ostree)"] }, + { "shell": ["(bash|zsh|csh|fish)"] }, + { "bluetooth": ["(bluez|bluetooth)"] }, + { "sound": ["(alsa|pulseaudio|wireplumber|flac|codecs|ldac|sound)"] }, + { "compression": ["(tar|zip|brotli|xz-utils|webp|zstd|lz4)"] }, + { + "runtime": [ + "(perl|lua|php|python|ruby|dotnet|java|swift|runtime|glibc|musl|wasm)" + ] + }, + { "editor": ["(vim|emacs|nano|hexedit)"] }, + { "xml": ["(xml|expat)"] }, + { "boot": ["(grub|systemd-boot|syslinux)"] }, + { "gui": ["(wayland|xorg|X11|mesa|vulkan|tk|wkhtmltox)"] }, + { "package": ["(rpm|dnf|yum|apt|zypper|apk)"] } + ] + } } diff --git a/lib/stages/postgen/annotator.js b/lib/stages/postgen/annotator.js index 06735dae6..f26bb7b7d 100644 --- a/lib/stages/postgen/annotator.js +++ b/lib/stages/postgen/annotator.js @@ -2,6 +2,7 @@ import { readFileSync } from "node:fs"; import { join } from "node:path"; import { dirNameStr } from "../../helpers/utils.js"; +// Tags per BOM type. const componentTags = JSON.parse( readFileSync(join(dirNameStr, "data", "component-tags.json"), "utf-8"), ); @@ -40,7 +41,44 @@ function cleanTypes(s) { } /** - * Create the textual representation of the metadata section + * Method to determine the type of the BOM. + * + * @param {Object} bomJson BOM JSON Object + * + * @returns {String} Type of the bom such as sbom, cbom, obom, ml-bom etc + */ +export function findBomType(bomJson) { + let description = "Software Bill-of-Materials (SBOM)"; + let bomType = "SBOM"; + const metadata = bomJson.metadata; + const lifecycles = metadata?.lifecycles || []; + const cryptoAssetsCount = bomJson?.components?.filter( + (c) => c.type === "cryptographic-asset", + ).length; + const dataCount = bomJson?.components?.filter( + (c) => + c?.data?.length > 0 || + (c.modelCard && Object.keys(c?.modelCard).length > 0), + ).length; + // Is this an OBOM? + if (lifecycles.filter((l) => l.phase === "operations").length > 0) { + bomType = "OBOM"; + description = "Operations Bill-of-Materials (OBOM)"; + } else if (cryptoAssetsCount > 0) { + bomType = "CBOM"; + description = "Cryptography Bill-of-Materials (CBOM)"; + } else if (dataCount > 0) { + bomType = "ML-BOM"; + description = "Machine-Learning Bill-of-Materials (ML-BOM)"; + } + return { + bomType, + bomTypeDescription: description, + }; +} + +/** + * Create the textual representation of the metadata section. * * @param {Object} bomJson BOM JSON Object * @@ -51,20 +89,14 @@ export function textualMetadata(bomJson) { return undefined; } let text = ""; - let cdxTypeDesc = "Software Bill-of-Materials (SBOM)"; + const { bomType, bomTypeDescription } = findBomType(bomJson); const metadata = bomJson.metadata; const lifecycles = metadata?.lifecycles || []; - const cryptoAssetsCount = bomJson?.components.filter( + const cryptoAssetsCount = bomJson?.components?.filter( (c) => c.type === "cryptographic-asset", ).length; - // Is this an OBOM? - if (lifecycles.filter((l) => l.phase === "operations").length > 0) { - cdxTypeDesc = "Operations Bill-of-Materials (OBOM)"; - } else if (cryptoAssetsCount > 0) { - cdxTypeDesc = "Cryptography Bill-of-Materials (CBOM)"; - } if (metadata?.timestamp) { - text = `This ${cdxTypeDesc} document was created on ${humanifyTimestamp(metadata.timestamp)}`; + text = `This ${bomTypeDescription} document was created on ${humanifyTimestamp(metadata.timestamp)}`; } if (metadata?.tools) { const tools = metadata.tools.components; @@ -101,7 +133,7 @@ export function textualMetadata(bomJson) { text = `${text} The document describes ${toArticle(metadata.component.type)} ${cleanTypeName} named '${cleanNames(metadata.component.name)}'.`; } if (cryptoAssetsCount) { - text = `${text} There are ${cryptoAssetsCount} cryptographic assets listed under components in this CBOM.`; + text = `${text} There are ${cryptoAssetsCount} cryptographic assets listed under components in this ${bomType}.`; } if ( metadata?.component.components && @@ -165,7 +197,7 @@ export function textualMetadata(bomJson) { } if (bomPkgTypes.length && bomPkgNamespaces.length) { if (bomPkgTypes.length === 1) { - text = `${text} The package type in this xBOM is ${joinArray(bomPkgTypes)} with ${bomPkgNamespaces.length} namespaces described under components.`; + text = `${text} The package type in this ${bomType} is ${joinArray(bomPkgTypes)} with ${bomPkgNamespaces.length} namespaces described under components.`; } else { text = `${text} ${bomPkgTypes.length} package type(s) and ${bomPkgNamespaces.length} namespaces are described in the document under components.`; } @@ -177,18 +209,45 @@ export function textualMetadata(bomJson) { * Extract interesting tags from the component attribute * * @param {Object} component CycloneDX component + * @param {String} bomType BOM type * @returns {Array | undefined} Array of string tags */ -export function extractTags(component) { - if (!component || (!component.description && !component.properties)) { +export function extractTags(component, bomType = "all") { + if ( + !component || + (!component.description && !component.properties && !component.name) + ) { return undefined; } const tags = new Set(); const desc = component?.description?.toLowerCase(); const compProps = component.properties || []; + // Collect both the BOM specific tags and all tags + const compNameTags = (componentTags.name[bomType.toLowerCase()] || []).concat( + componentTags.name.all || [], + ); + const compDescTags = ( + componentTags.description[bomType.toLowerCase()] || [] + ).concat(componentTags.description.all || []); + const compPropsTags = ( + componentTags.properties[bomType.toLowerCase()] || [] + ).concat(componentTags.properties.all || []); + if (component?.name) { + // {"devel": ["/-(dev|devel|headers)$/"]} + for (const anameTagObject of compNameTags) { + for (const compCategoryTag of Object.keys(anameTagObject)) { + for (const catRegexStr of anameTagObject[compCategoryTag]) { + // Regex-based search on the name + if (new RegExp(catRegexStr, "ig").test(component.name)) { + tags.add(compCategoryTag); + } + } + } + } + } // Identify tags from description if (desc) { - for (const adescTag of componentTags.description) { + for (const adescTag of compDescTags) { if (desc.includes(` ${adescTag} `) || desc.includes(` ${adescTag}.`)) { tags.add(adescTag); } @@ -204,7 +263,7 @@ export function extractTags(component) { } // Identify tags from properties as a fallback if (!tags.size) { - for (const adescTag of componentTags.properties) { + for (const adescTag of compPropsTags) { for (const aprop of compProps) { if ( aprop.name !== "SrcFile" && diff --git a/lib/stages/postgen/annotator.test.js b/lib/stages/postgen/annotator.test.js index 1a8921e08..61daa2821 100644 --- a/lib/stages/postgen/annotator.test.js +++ b/lib/stages/postgen/annotator.test.js @@ -1,4 +1,4 @@ -import { textualMetadata } from "./annotator.js"; +import { extractTags, textualMetadata } from "./annotator.js"; import { expect, test } from "@jest/globals"; @@ -267,3 +267,10 @@ test("textualMetadata tests", () => { "This Operations Bill-of-Materials (OBOM) document was created on Monday, November 11, 2024 with cdxgen. The lifecycles phases represented are: pre-build and operations. The document describes an operating system named 'Microsoft Windows 11 Pro' with version '22H2'. The OS is x64 architecture with the build version '10.0.22621'.", ); }); + +test("extractTags tests", () => { + expect(extractTags({ name: "container-selinux" }, "obom")).toEqual([ + "container", + "security", + ]); +}); diff --git a/lib/stages/postgen/postgen.js b/lib/stages/postgen/postgen.js index 8224114ec..438f20c95 100644 --- a/lib/stages/postgen/postgen.js +++ b/lib/stages/postgen/postgen.js @@ -9,7 +9,7 @@ import { getTimestamp, hasAnyProjectType, } from "../../helpers/utils.js"; -import { extractTags, textualMetadata } from "./annotator.js"; +import { extractTags, findBomType, textualMetadata } from "./annotator.js"; /** * Filter and enhance BOM post generation. @@ -344,9 +344,10 @@ export function annotate(bomJson, options) { bomJson.metadata.component["bom-ref"] = undefined; } } + const { bomType, bomTypeDescription } = findBomType(bomJson); // Tag the components for (const comp of bomJson.components) { - const tags = extractTags(comp); + const tags = extractTags(comp, bomType); if (tags?.length) { comp.tags = tags; } diff --git a/types/lib/stages/postgen/annotator.d.ts b/types/lib/stages/postgen/annotator.d.ts index f26c8cbad..0d5f72ac9 100644 --- a/types/lib/stages/postgen/annotator.d.ts +++ b/types/lib/stages/postgen/annotator.d.ts @@ -1,5 +1,13 @@ /** - * Create the textual representation of the metadata section + * Method to determine the type of the BOM. + * + * @param {Object} bomJson BOM JSON Object + * + * @returns {String} Type of the bom such as sbom, cbom, obom, ml-bom etc + */ +export function findBomType(bomJson: any): string; +/** + * Create the textual representation of the metadata section. * * @param {Object} bomJson BOM JSON Object * @@ -10,7 +18,8 @@ export function textualMetadata(bomJson: any): string | undefined; * Extract interesting tags from the component attribute * * @param {Object} component CycloneDX component + * @param {String} bomType BOM type * @returns {Array | undefined} Array of string tags */ -export function extractTags(component: any): any[] | undefined; +export function extractTags(component: any, bomType?: string): any[] | undefined; //# sourceMappingURL=annotator.d.ts.map \ No newline at end of file diff --git a/types/lib/stages/postgen/annotator.d.ts.map b/types/lib/stages/postgen/annotator.d.ts.map index 618d912d9..2c31e714b 100644 --- a/types/lib/stages/postgen/annotator.d.ts.map +++ b/types/lib/stages/postgen/annotator.d.ts.map @@ -1 +1 @@ -{"version":3,"file":"annotator.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/annotator.js"],"names":[],"mappings":"AAyCA;;;;;;GAMG;AACH,+CAFa,SAAS,SAAS,CA+H9B;AAED;;;;;GAKG;AACH,6CAFa,QAAQ,SAAS,CAuC7B"} \ No newline at end of file +{"version":3,"file":"annotator.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/annotator.js"],"names":[],"mappings":"AA0CA;;;;;;GAMG;AACH,kDA4BC;AAED;;;;;;GAMG;AACH,+CAFa,SAAS,SAAS,CAyH9B;AAED;;;;;;GAMG;AACH,+DAFa,QAAQ,SAAS,CAiE7B"} \ No newline at end of file diff --git a/types/lib/stages/postgen/postgen.d.ts.map b/types/lib/stages/postgen/postgen.d.ts.map index fe595164e..455ff16a4 100644 --- a/types/lib/stages/postgen/postgen.d.ts.map +++ b/types/lib/stages/postgen/postgen.d.ts.map @@ -1 +1 @@ -{"version":3,"file":"postgen.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/postgen.js"],"names":[],"mappings":"AAaA;;;;;;;GAOG;AACH,+DAkBC;AAED;;;;;;;GAOG;AACH,gEAqCC;AAED;;;;;;;GAOG;AACH,gEA+BC;AAED;;;;;;;GAOG;AACH,2DAyIC;AAED;;GAEG;AACH,gDAIC;AAMD;;;;;;;GAOG;AACH,0DAmGC"} \ No newline at end of file +{"version":3,"file":"postgen.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/postgen.js"],"names":[],"mappings":"AAaA;;;;;;;GAOG;AACH,+DAkBC;AAED;;;;;;;GAOG;AACH,gEAqCC;AAED;;;;;;;GAOG;AACH,gEA+BC;AAED;;;;;;;GAOG;AACH,2DAyIC;AAED;;GAEG;AACH,gDAIC;AAMD;;;;;;;GAOG;AACH,0DAoGC"} \ No newline at end of file