From 1bfb701af3c4794948f780d590af072c7eaba45c Mon Sep 17 00:00:00 2001 From: Bernhard Schleicher Date: Tue, 28 Nov 2023 10:02:26 +0100 Subject: [PATCH] Improved fetch license for nuget (#739) * Improve fetch license for nuget Signed-off-by: Nikemare * Fixing issues checking license status Signed-off-by: Nikemare * Fixed type check Signed-off-by: Nikemare * Providing https instead of http Signed-off-by: Nikemare * Code formatting Signed-off-by: Nikemare * Added fetch_license test for dotnet-paket Signed-off-by: Nikemare * Refactored & improved nuget license resolving Signed-off-by: Nikemare * Fixed bug in getRepoLicense Signed-off-by: Nikemare * Add support for mono and apache.org Signed-off-by: Nikemare * Refactored known-licenses & rate limit docu Signed-off-by: Nikemare * Improve fetch license for nuget Signed-off-by: Nikemare * Fixing issues checking license status Signed-off-by: Nikemare * Fixed type check Signed-off-by: Nikemare * Providing https instead of http Signed-off-by: Nikemare * Code formatting Signed-off-by: Nikemare * Added fetch_license test for dotnet-paket Signed-off-by: Nikemare * Refactored & improved nuget license resolving Signed-off-by: Nikemare * Fixed bug in getRepoLicense Signed-off-by: Nikemare * Add support for mono and apache.org Signed-off-by: Nikemare * Refactored known-licenses & rate limit docu Signed-off-by: Nikemare * Small fixes for known-licenses Signed-off-by: Nikemare --------- Signed-off-by: Nikemare --- .github/workflows/repotests.yml | 1 + README.md | 2 +- data/known-licenses.json | 105 +++++++++++++++++------ utils.js | 142 +++++++++++++++++++++++++------- 4 files changed, 191 insertions(+), 59 deletions(-) diff --git a/.github/workflows/repotests.yml b/.github/workflows/repotests.yml index ca0d77faf..f93355817 100644 --- a/.github/workflows/repotests.yml +++ b/.github/workflows/repotests.yml @@ -295,6 +295,7 @@ jobs: - name: repotests dotnet-paket run: | bin/cdxgen.js -p -r -t dotnet repotests/dotnet-paket -o bomresults/bom-dotnet-paket.json --validate + FETCH_LICENSE=true bin/cdxgen.js -p -r -t dotnet repotests/dotnet-paket -o bomresults/bom-dotnet-paket-2.json --validate shell: bash - name: repotests blint run: | diff --git a/README.md b/README.md index 6def6b290..21a09c383 100644 --- a/README.md +++ b/README.md @@ -327,7 +327,7 @@ This would create a bom.json.map file with the jar - class name mapping. Refer t ## Resolving licenses -cdxgen can automatically query public registries such as maven, npm, or nuget to resolve the package licenses. This is a time-consuming operation and is disabled by default. To enable, set the environment variable `FETCH_LICENSE` to `true`, as shown. +cdxgen can automatically query public registries such as maven, npm, or nuget to resolve the package licenses. This is a time-consuming operation and is disabled by default. To enable, set the environment variable `FETCH_LICENSE` to `true`, as shown. Ensure that `GITHUB_TOKEN` is set or provided by [built-in GITHUB_TOKEN in GitHub Actions](https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api#primary-rate-limit-for-github_token-in-github-actions), otherwise rate limiting might prevent license resolving. ```bash export FETCH_LICENSE=true diff --git a/data/known-licenses.json b/data/known-licenses.json index 621bd4942..2e96e0734 100644 --- a/data/known-licenses.json +++ b/data/known-licenses.json @@ -1,31 +1,82 @@ [ - { "license": "Apache-2.0", "group": "cloud.google.com", "name": "go" }, - { "license": "Apache-2.0", "group": "cloud.google.com/go", "name": "*" }, - { "license": "Apache-2.0", "group": "cuelang.org", "name": "go" }, - { "license": "MIT", "group": "pack.ag", "name": "amqp" }, - { "license": "Apache-2.0", "group": "google.golang.org", "name": "*" }, - { "license": "BSD-3-Clause", "group": "golang.org/x", "name": "*" }, { - "license": "BSD-3-Clause", - "group": "dmitri.shuralyov.com/gpu", - "name": "*" + "packageNamespace": "*", + "knownLicenses": [{ "license": "MIT", "urlIncludes": "mit-license" }] }, - { "license": "Apache-2.0", "group": "contrib.go.opencensus.io", "name": "*" }, - { "license": "Apache-2.0", "group": "git.apache.org", "name": "*" }, - { "license": "Apache-2.0", "group": ".", "name": "go.opencensus.io" }, - { "license": "MIT", "group": "sigs.k8s.io", "name": "*" }, - { "license": "BSD-3-Clause", "group": "rsc.io", "name": "*" }, - { "license": "Apache-2.0", "group": "openpitrix.io", "name": "*" }, - { "license": "BSD-3-Clause", "group": "modernc.org", "name": "*" }, - { "license": "Apache-2.0", "group": "kubesphere.io", "name": "*" }, - { "license": "Apache-2.0", "group": "k8s.io", "name": "*" }, - { "license": "Apache-2.0", "group": "istio.io", "name": "*" }, - { "license": "MIT", "group": "honnef.co/go", "name": "*" }, - { "license": "Apache-2.0", "group": ".", "name": "gotest.tools" }, - { "license": "Apache-2.0", "group": "gopkg.in", "name": "*" }, - { "license": "Apache-2.0", "group": "code.cloudfoundry.org", "name": "*" }, - { "license": "BSD-3-Clause", "group": "gonum.org/v1", "name": "*" }, - { "license": "Apache-2.0", "group": "gomodules.xyz/jsonpatch", "name": "*" }, - { "license": "MIT", "group": "go.uber.org", "name": "*" }, - { "license": "MIT", "group": "go.etcd.io", "name": "*" } + { + "packageNamespace": "pkg:golang/", + "knownLicenses": [ + { "license": "Apache-2.0", "group": "cloud.google.com", "name": "go" }, + { "license": "Apache-2.0", "group": "cloud.google.com/go", "name": "*" }, + { "license": "Apache-2.0", "group": "cuelang.org", "name": "go" }, + { "license": "MIT", "group": "pack.ag", "name": "amqp" }, + { "license": "Apache-2.0", "group": "google.golang.org", "name": "*" }, + { "license": "BSD-3-Clause", "group": "golang.org/x", "name": "*" }, + { + "license": "BSD-3-Clause", + "group": "dmitri.shuralyov.com/gpu", + "name": "*" + }, + { + "license": "Apache-2.0", + "group": "contrib.go.opencensus.io", + "name": "*" + }, + { "license": "Apache-2.0", "group": "git.apache.org", "name": "*" }, + { "license": "Apache-2.0", "group": ".", "name": "go.opencensus.io" }, + { "license": "MIT", "group": "sigs.k8s.io", "name": "*" }, + { "license": "BSD-3-Clause", "group": "rsc.io", "name": "*" }, + { "license": "Apache-2.0", "group": "openpitrix.io", "name": "*" }, + { "license": "BSD-3-Clause", "group": "modernc.org", "name": "*" }, + { "license": "Apache-2.0", "group": "kubesphere.io", "name": "*" }, + { "license": "Apache-2.0", "group": "k8s.io", "name": "*" }, + { "license": "Apache-2.0", "group": "istio.io", "name": "*" }, + { "license": "MIT", "group": "honnef.co/go", "name": "*" }, + { "license": "Apache-2.0", "group": ".", "name": "gotest.tools" }, + { "license": "Apache-2.0", "group": "gopkg.in", "name": "*" }, + { + "license": "Apache-2.0", + "group": "code.cloudfoundry.org", + "name": "*" + }, + { "license": "BSD-3-Clause", "group": "gonum.org/v1", "name": "*" }, + { + "license": "Apache-2.0", + "group": "gomodules.xyz/jsonpatch", + "name": "*" + }, + { "license": "MIT", "group": "go.uber.org", "name": "*" }, + { "license": "MIT", "group": "go.etcd.io", "name": "*" } + ] + }, + { + "packageNamespace": "pkg:nuget/", + "knownLicenses": [ + { + "license": "MIT", + "urlIncludes": "//github.com/dotnet/standard/", + "licenseEvidence": "https://github.com/dotnet/standard/blob/release/3.0/LICENSE.TXT" + }, + { + "license": "MIT", + "urlIncludes": "//github.com/dotnet/corefx/", + "licenseEvidence": "https://github.com/dotnet/corefx/blob/release/2.0.0/LICENSE.TXT" + }, + { + "license": "MIT", + "urlIncludes": "//github.com/dotnet/core-setup/", + "licenseEvidence": "https://github.com/dotnet/core-setup/blob/release/2.0.0/LICENSE.TXT" + }, + { + "licenseName": ".NET Library License", + "urlEndswith": "?LinkId=329770", + "licenseEvidence": "https://go.microsoft.com/fwlink/?LinkId=329770" + }, + { + "licenseName": ".NET Library License", + "urlEndswith": "dotnet_library_license.htm", + "licenseEvidence": "https://dotnet.microsoft.com/en-us/dotnet_library_license.htm" + } + ] + } ] diff --git a/utils.js b/utils.js index 46b181151..ed4256dc7 100644 --- a/utils.js +++ b/utils.js @@ -217,20 +217,10 @@ export function getLicenses(pkg, format = "xml") { licenseContent.id = l; licenseContent.url = "https://opensource.org/licenses/" + l; } else if (l.startsWith("http")) { - if (!l.includes("opensource.org")) { - licenseContent.name = "CUSTOM"; - } else { - const possibleId = l - .replace("http://www.opensource.org/licenses/", "") - .toUpperCase(); - spdxLicenses.forEach((v) => { - if (v.toUpperCase() === possibleId) { - licenseContent.id = v; - } - }); - } - if (l.includes("mit-license")) { - licenseContent.id = "MIT"; + let knownLicense = getKnownLicense(l, pkg); + if (knownLicense) { + licenseContent.id = knownLicense.id; + licenseContent.name = knownLicense.name; } // We always need a name to avoid validation errors // Issue: #469 @@ -252,10 +242,82 @@ export function getLicenses(pkg, format = "xml") { return licenseContent; }) .map((l) => ({ license: l })); + } else { + let knownLicense = getKnownLicense(undefined, pkg); + if (knownLicense) { + return [{ license: knownLicense }]; + } } return undefined; } +/** + * Method to retrieve known license by known-licenses.json + * + * @param {String} repoUrl Repository url + * @param {String} pkg Bom ref + * @return {Object>} Objetct with SPDX license id or license name + */ +export const getKnownLicense = function (licenseUrl, pkg) { + if (licenseUrl && licenseUrl.includes("opensource.org")) { + const possibleId = licenseUrl + .toLowerCase() + .replace("https://", "http://") + .replace("http://www.opensource.org/licenses/", ""); + for (const spdxLicense of spdxLicenses) { + if (spdxLicense.toLowerCase() === possibleId) { + return { id: spdxLicense }; + } + } + } else if (licenseUrl && licenseUrl.includes("apache.org")) { + const possibleId = licenseUrl + .toLowerCase() + .replace("https://", "http://") + .replace("http://www.apache.org/licenses/license-", "apache-") + .replace(".txt", ""); + for (const spdxLicense of spdxLicenses) { + if (spdxLicense.toLowerCase() === possibleId) { + return { id: spdxLicense }; + } + } + } + for (const akLicGroup of knownLicenses) { + if ( + akLicGroup.packageNamespace === "*" || + (pkg.purl && pkg.purl.startsWith(akLicGroup.packageNamespace)) + ) { + for (const akLic of akLicGroup.knownLicenses) { + if (akLic.group && akLic.name) { + if (akLic.group === "." && akLic.name === pkg.name) { + return { id: akLic.license, name: akLic.licenseName }; + } else if ( + pkg.group && + pkg.group.includes(akLic.group) && + (akLic.name === pkg.name || akLic.name === "*") + ) { + return { id: akLic.license, name: akLic.licenseName }; + } + } + if ( + akLic.urlIncludes && + licenseUrl && + licenseUrl.includes(akLic.urlIncludes) + ) { + return { id: akLic.license, name: akLic.licenseName }; + } + if ( + akLic.urlEndswith && + licenseUrl && + licenseUrl.endsWith(akLic.urlEndswith) + ) { + return { id: akLic.license, name: akLic.licenseName }; + } + } + } + } + return undefined; +}; + /** * Tries to find a file containing the license text based on commonly * used naming and content types. If a candidate file is found, add @@ -2430,7 +2492,7 @@ export const fetchPomXmlAsJson = async function ({ * @param {String} name * @param {String} version * - * @return {String} + * @return {Promise} */ export const fetchPomXml = async function ({ urlPrefix, @@ -2467,7 +2529,7 @@ export const parseLicenseEntryOrArrayFromPomXml = function (license) { * @param {String} name * @param {String} version * - * @return {String} License ID + * @return {Promise} License ID */ export const extractLicenseCommentFromPomXml = async function ({ urlPrefix, @@ -3287,7 +3349,7 @@ export const toGitHubApiUrl = function (repoUrl, repoMetadata) { * * @param {String} repoUrl Repository url * @param {Object} repoMetadata Object containing group and package name strings - * @return {String} SPDX license id + * @return {Promise} SPDX license id */ export const getRepoLicense = async function (repoUrl, repoMetadata) { let apiUrl = toGitHubApiUrl(repoUrl, repoMetadata); @@ -3323,23 +3385,23 @@ export const getRepoLicense = async function (repoUrl, repoMetadata) { } } licObj["id"] = licenseId; - return licObj; + if (licObj["id"] || licObj["name"]) { + return licObj; + } } } catch (err) { - return undefined; - } - } else if (repoMetadata) { - const group = repoMetadata.group; - const name = repoMetadata.name; - if (group && name) { - for (const akLic of knownLicenses) { - if (akLic.group === "." && akLic.name === name) { - return akLic.license; - } else if ( - group.includes(akLic.group) && - (akLic.name === name || akLic.name === "*") + if (err && err.message) { + if ( + err.message.includes("rate limit exceeded") && + !process.env.GITHUB_TOKEN ) { - return akLic.license; + console.log( + "Rate limit exceeded for REST API of github.com. " + + "Please ensure GITHUB_TOKEN is set as environment variable. " + + "See: https://docs.github.com/en/rest/overview/rate-limits-for-the-rest-api" + ); + } else if (!err.message.includes("404")) { + console.log(err); } } } @@ -8465,6 +8527,13 @@ export const getNugetMetadata = async function ( p.license = findLicenseId(body.catalogEntry.licenseExpression); } else if (body.catalogEntry.licenseUrl) { p.license = findLicenseId(body.catalogEntry.licenseUrl); + if ( + typeof p.license === "string" && + p.license.includes("://github.com/") + ) { + p.license = + (await getRepoLicense(p.license, undefined)) || p.license; + } } if (body.catalogEntry.projectUrl) { p.repository = { url: body.catalogEntry.projectUrl }; @@ -8476,6 +8545,17 @@ export const getNugetMetadata = async function ( p.version + "/" }; + if ( + (!p.license || typeof p.license === "string") && + typeof p.repository.url === "string" && + p.repository.url.includes("://github.com/") + ) { + // license couldn't be properly identified and is still a url, + // therefore trying to resolve license via repository + p.license = + (await getRepoLicense(p.repository.url, undefined)) || + p.license; + } } cdepList.push(p); }