Skip to content

Commit

Permalink
Fix fetching licenses for jvm packages (#682)
Browse files Browse the repository at this point in the history
* Added several entries to lic-mapping.json

Signed-off-by: Jacek Puchta <jacek.puchta@dotdata.com>

* Improved deducing license of MVN packages

Signed-off-by: Jacek Puchta <jacek.puchta@dotdata.com>

* style

Signed-off-by: Jacek Puchta <jacek.puchta@dotdata.com>

---------

Signed-off-by: Jacek Puchta <jacek.puchta@dotdata.com>
  • Loading branch information
puchta authored Nov 1, 2023
1 parent db764dc commit cd495d0
Show file tree
Hide file tree
Showing 2 changed files with 200 additions and 56 deletions.
22 changes: 21 additions & 1 deletion data/lic-mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
{
"exp": "Apache-2.0",
"names": [
"Apache2",
"Apache 2",
"Apache 2.0",
"Apache Version 2.0",
"Apache 2.0 License",
"Apache Software License, Version 2.0",
"The Apache Software License, Version 2.0",
"Apache License v2.0",
"Apache License (v2.0)",
"Apache License 2.0",
"Apache License Version 2.0",
Expand All @@ -20,6 +22,9 @@
"Apache-2.0 OR MIT",
"Apache2.0",
"apache-2-0",
"APL2",
"the Apache License, ASL Version 2.0",
"Apache Publich License 2.0",
"https://opensource.org/licenses/Apache2.0",
"https://opensource.org/license/apache-2-0",
"http://www.apache.org/licenses/LICENSE-2.0.html"
Expand All @@ -44,7 +49,8 @@
"BSD-2-Clause",
"BSD 2-Clause License",
"The BSD 2-Clause License",
"The 2-Clause BSD License"
"The 2-Clause BSD License",
"The BSD License"
]
},
{
Expand All @@ -53,6 +59,7 @@
"BSD 3 Clause",
"BSD 3-Clause",
"BSD-3-Clause",
"BSD 3-clause",
"BSD 3-Clause License",
"The BSD 3-Clause License",
"BSD 3-Clause \"New\" or \"Revised\" License (BSD-3-Clause)",
Expand All @@ -78,6 +85,12 @@
"BSD (4-clause)"
]
},
{
"exp": "CC0-1.0",
"names": [
"CC0"
]
},
{
"exp": "CDDL-1.0",
"names": [
Expand Down Expand Up @@ -159,6 +172,7 @@
"LGPL v2.1",
"LGPL-2.1",
"LGPL2.1",
"LGPL, version 2.1",
"GNU Lesser General Public License",
"GNU Lesser General Public License Version 2.1",
"GNU Lesser General Public License Version 2.1, February 1999",
Expand Down Expand Up @@ -281,6 +295,8 @@
"names": [
"MPL 2.0",
"Mozilla Public License 2.0",
"Mozilla Public License version 2.0",
"Mozilla Public License, version 2.0",
"Mozilla Public License 2.0 (MPL 2.0)"
]
},
Expand All @@ -296,6 +312,10 @@
"exp": "ISC",
"names": ["ISC license", "ISC License (ISCL)"]
},
{
"exp": "ICU",
"names": ["Unicode/ICU License"]
},
{
"exp": "PSF-2.0",
"names": [
Expand Down
234 changes: 179 additions & 55 deletions utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -2295,46 +2295,24 @@ export const getMvnMetadata = async function (pkgList, jarNSMapping = {}) {
if (group.indexOf("android") !== -1) {
urlPrefix = ANDROID_MAVEN;
}
const groupPart = group.replace(/\./g, "/");
// Querying maven requires a valid group name
if (!groupPart || groupPart === "") {
if (!group || group === "") {
cdepList.push(p);
continue;
}
const fullUrl =
urlPrefix +
groupPart +
"/" +
p.name +
"/" +
p.version +
"/" +
p.name +
"-" +
p.version +
".pom";
const pomMetadata = {
urlPrefix: urlPrefix,
group: group,
name: p.name,
version: p.version
};
try {
if (DEBUG_MODE) {
console.log(`Querying ${fullUrl}`);
}
const res = await cdxgenAgent.get(fullUrl);
const bodyJson = xml2js(res.body, {
compact: true,
spaces: 4,
textKey: "_",
attributesKey: "$",
commentKey: "value"
}).project;
if (bodyJson && bodyJson.licenses && bodyJson.licenses.license) {
if (Array.isArray(bodyJson.licenses.license)) {
p.license = bodyJson.licenses.license.map((l) => {
return findLicenseId(l.name._);
});
} else if (Object.keys(bodyJson.licenses.license).length) {
const l = bodyJson.licenses.license;
p.license = [findLicenseId(l.name._)];
}
console.log(
`Querying ${pomMetadata} from ${composePomXmlUrl(pomMetadata)}`
);
}
const bodyJson = await fetchPomXmlAsJson(pomMetadata);
p.publisher =
bodyJson.organization && bodyJson.organization.name
? bodyJson.organization.name._
Expand All @@ -2343,23 +2321,151 @@ export const getMvnMetadata = async function (pkgList, jarNSMapping = {}) {
if (bodyJson.scm && bodyJson.scm.url) {
p.repository = { url: bodyJson.scm.url._ };
}
cdepList.push(p);
p.license =
parseLicenseEntryOrArrayFromPomXml(bodyJson?.licenses?.license) ||
(await extractLicenseCommentFromPomXml(pomMetadata)) ||
(await getRepoLicense(p.repository?.url, undefined));
} catch (err) {
if (DEBUG_MODE) {
console.log(
"Unable to find metadata for",
group,
p.name,
p.version,
fullUrl
`An error occurred when trying to fetch metadata ${pomMetadata}`,
err
);
}
} finally {
cdepList.push(p);
}
}
return cdepList;
};

/**
* Method to compose URL of pom.xml
*
* @param {String} urlPrefix
* @param {String} group
* @param {String} name
* @param {String} version
*
* @return {String} fullUrl
*/
export const composePomXmlUrl = function ({ urlPrefix, group, name, version }) {
const groupPart = group.replace(/\./g, "/");
const fullUrl =
urlPrefix +
groupPart +
"/" +
name +
"/" +
version +
"/" +
name +
"-" +
version +
".pom";
return fullUrl;
};

/**
* Method to fetch pom.xml data and parse it to JSON
*
* @param {String} urlPrefix
* @param {String} group
* @param {String} name
* @param {String} version
*
* @return {Object|undefined}
*/
export const fetchPomXmlAsJson = async function ({
urlPrefix,
group,
name,
version
}) {
const pomXml = await fetchPomXml({ urlPrefix, group, name, version });
const options = {
compact: true,
spaces: 4,
textKey: "_",
attributesKey: "$",
commentKey: "value"
};
const pomJson = xml2js(pomXml, options).project;
if (pomJson?.parent) {
const parentXml = await fetchPomXml({
urlPrefix,
group: pomJson.parent.groupId?._,
name: pomJson.parent.artifactId?._,
version: pomJson.parent.version?._
});
const parentJson = xml2js(parentXml, options).project;
const result = { ...parentJson, ...pomJson };
return result;
}
return pomJson;
};

/**
* Method to fetch pom.xml data
*
* @param {String} urlPrefix
* @param {String} group
* @param {String} name
* @param {String} version
*
* @return {String}
*/
export const fetchPomXml = async function ({
urlPrefix,
group,
name,
version
}) {
let fullUrl = composePomXmlUrl({ urlPrefix, group, name, version });
const res = await cdxgenAgent.get(fullUrl);
return res.body;
};

/**
* Method extract single or multiple license entries that might appear in pom.xml
*
* @param {Object|Array} license
*/
export const parseLicenseEntryOrArrayFromPomXml = function (license) {
if (!license) return;
if (Array.isArray(license)) {
return license.map((l) => {
return findLicenseId(l.name._);
});
} else if (Object.keys(license).length) {
return [findLicenseId(license.name._)];
}
};

/**
* Method to parse pom.xml in search of a comment containing license text
*
* @param {String} urlPrefix
* @param {String} group
* @param {String} name
* @param {String} version
*
* @return {String} License ID
*/
export const extractLicenseCommentFromPomXml = async function ({
urlPrefix,
group,
name,
version
}) {
const pom_xml = await fetchPomXml({ urlPrefix, group, name, version });
const licenseRegex = /<!--([\s\S]*?)-->[\s\n]*<project/m;
const match = licenseRegex.exec(pom_xml);
if (match && match[1]) {
return findLicenseId(match[1].trim());
}
};

/**
* Method to parse python requires_dist attribute found in pypi setup.py
*
Expand Down Expand Up @@ -3114,14 +3220,15 @@ export const parseSetupPyFile = async function (setupPyData) {
};

/**
* Method to construct a github url for the given repo
* Method to construct a GitHub API url for the given repo metadata
* @param {Object} repoMetadata Repo metadata with group and name
* @return {String|undefined} github api url (or undefined - if not enough data)
*/
export const toGitHubUrl = function (repoMetadata) {
export const repoMetadataToGitHubApiUrl = function (repoMetadata) {
if (repoMetadata) {
const group = repoMetadata.group;
const name = repoMetadata.name;
let ghUrl = "https://github.com";
let ghUrl = "https://api.github.com/repos";
if (group && group !== "." && group != "") {
ghUrl = ghUrl + "/" + group.replace("github.com/", "");
}
Expand All @@ -3132,6 +3239,32 @@ export const toGitHubUrl = function (repoMetadata) {
}
};

/**
* Method to construct GitHub api url from repo metadata or one of multiple formats of repo URLs
* @param {String} repoUrl Repository url
* @param {Object} repoMetadata Object containing group and package name strings
* @return {String|undefined} github api url (or undefined - if not a GitHub repo)
*/
export const toGitHubApiUrl = function (repoUrl, repoMetadata) {
if (!repoUrl || !repoUrl.includes("://github.com/")) {
return repoMetadataToGitHubApiUrl(repoMetadata);
}
if (repoUrl.toLowerCase().endsWith(".git")) {
repoUrl = repoUrl.slice(0, -4);
}
repoUrl.replace(/\/$/, "");
const parts = repoUrl.split("/");

if (parts.length < 5 || parts[2] !== "github.com") {
return undefined; // Not a valid GitHub repo URL
} else {
return repoMetadataToGitHubApiUrl({
group: parts[3],
name: parts[4]
});
}
};

/**
* Method to retrieve repo license by querying github api
*
Expand All @@ -3140,25 +3273,16 @@ export const toGitHubUrl = function (repoMetadata) {
* @return {String} SPDX license id
*/
export const getRepoLicense = async function (repoUrl, repoMetadata) {
if (!repoUrl) {
repoUrl = toGitHubUrl(repoMetadata);
}
if (repoUrl.startsWith("git://") && repoUrl.endsWith(".git")) {
repoUrl = repoUrl.replace("git://", "https://").slice(0, -4);
}
let apiUrl = toGitHubApiUrl(repoUrl, repoMetadata);
// Perform github lookups
if (repoUrl.indexOf("github.com") > -1) {
let apiUrl = repoUrl.replace(
"https://github.com",
"https://api.github.com/repos"
);
apiUrl += "/license";
if (apiUrl) {
let licenseUrl = apiUrl + "/license";
const headers = {};
if (process.env.GITHUB_TOKEN) {
headers["Authorization"] = "Bearer " + process.env.GITHUB_TOKEN;
}
try {
const res = await cdxgenAgent.get(apiUrl, {
const res = await cdxgenAgent.get(licenseUrl, {
responseType: "json",
headers: headers
});
Expand Down

0 comments on commit cd495d0

Please sign in to comment.