Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace javasrc2cpg ReflectionTypeSolver with JdkJarTypeSolver at java home #3006

Merged
merged 10 commits into from
Jul 20, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,22 @@ object JavaSrc2Cpg {
new JavaTypeHintCallLinker(cpg)
)
}

def showEnv(): Unit = {
val value =
JavaSrcEnvVar.values.foreach { envVar =>
val currentValue = Option(System.getenv(envVar.name)).getOrElse("<unset>")
println(s"${envVar.name}:")
println(s" Description : ${envVar.description}")
println(s" Current value: $currentValue")
}
}

enum JavaSrcEnvVar(val name: String, val description: String) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is perhaps excessive for a single environment variabl, but I also think that, if environment variables are going to be used for configuration, they should be documented somehow and easily discoverable. I plan to extend this idea to the rest of joern (I know at least php2cpg uses some environment variables as well), but put this here since it's currently limited to javasrc.

case JdkPath
extends JavaSrcEnvVar(
"JAVASRC_JDK_PATH",
"Path to the JDK home used for retrieving type information about builtin Java types."
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ final case class Config(
delombokMode: Option[String] = None,
enableTypeRecovery: Boolean = false,
disableDummyTypes: Boolean = false,
jdkPath: Option[String] = None
jdkPath: Option[String] = None,
showEnv: Boolean = false
) extends X2CpgConfig[Config] {
def withInferenceJarPaths(paths: Set[String]): Config = {
copy(inferenceJarPaths = paths).withInheritedFields(this)
Expand Down Expand Up @@ -47,6 +48,10 @@ final case class Config(
def withJdkPath(path: String): Config = {
copy(jdkPath = Some(path)).withInheritedFields(this)
}

def withShowEnv(value: Boolean): Config = {
copy(showEnv = value).withInheritedFields(this)
}
}

private object Frontend {
Expand Down Expand Up @@ -84,14 +89,32 @@ private object Frontend {
.text("disable generation of dummy types during type recovery"),
opt[String]("jdk-path")
.action((path, c) => c.withJdkPath(path))
.text("JDK used for resolving builtin Java types. If not set, current classpath will be used")
.text("JDK used for resolving builtin Java types. If not set, current classpath will be used"),
opt[Unit]("show-env")
.action((_, c) => c.withShowEnv(true))
.text("print information about environment variables used by javasrc2cpg and exit.")
)
}
}

object Main extends X2CpgMain(cmdLineParser, new JavaSrc2Cpg()) {

override def main(args: Array[String]): Unit = {
// TODO: This is a hack to allow users to use the "--show-env" option without having
// to specify an input argument. Clean this up when adding this option to more frontends.
if (args.contains("--show-env")) {
super.main(Array("--show-env", "<input_dir_placeholder>"))
} else {
super.main(args)
}
}

def run(config: Config, javasrc2Cpg: JavaSrc2Cpg): Unit = {
javasrc2Cpg.run(config)
if (config.showEnv) {
JavaSrc2Cpg.showEnv()
} else {
javasrc2Cpg.run(config)
}
}

def getCmdLineParser = cmdLineParser
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import com.github.javaparser.symbolsolver.resolution.typesolvers.ClassLoaderType
import java.net.URLClassLoader
import com.github.javaparser.symbolsolver.resolution.typesolvers.ReflectionTypeSolver
import io.joern.javasrc2cpg.typesolvers.CachingReflectionTypeSolver
import io.joern.javasrc2cpg.JavaSrc2Cpg.JavaSrcEnvVar

case class SourceDirectoryInfo(typeSolverSourceDirs: List[String], sourceFiles: List[SourceFileInfo])
case class SplitDirectories(analysisSourceDir: String, typesSourceDir: String)
Expand Down Expand Up @@ -90,15 +91,28 @@ class AstCreationPass(config: Config, cpg: Cpg, preCreatedAsts: Option[SplitJpAs

val combinedTypeSolver = new SimpleCombinedTypeSolver()

config.jdkPath match {
case Some(path) =>
val jdkJarTypeSolver = JdkJarTypeSolver.fromJdkPath(path)
combinedTypeSolver.add(jdkJarTypeSolver)

case None =>
combinedTypeSolver.add(new CachingReflectionTypeSolver())
val jdkPathFromEnvVar = Option(System.getenv(JavaSrcEnvVar.JdkPath.name))
val jdkPath = (config.jdkPath, jdkPathFromEnvVar) match {
case (None, None) =>
val javaHome = System.getProperty("java.home")
logger.info(
s"No explicit jdk-path set in config, so using system java.home for JDK type information: $javaHome"
)
javaHome

case (None, Some(jdkPath)) =>
logger.info(
s"Using JDK path from environment variable ${JavaSrcEnvVar.JdkPath.name} for JDK type information: $jdkPath"
)
jdkPath

case (Some(jdkPath), _) =>
logger.info(s"Using JDK path set with jdk-path option for JDK type information: $jdkPath")
jdkPath
}

combinedTypeSolver.add(JdkJarTypeSolver.fromJdkPath(jdkPath))

val sourceTypeSolver = EagerSourceTypeSolver(typesAsts, combinedTypeSolver)
// The sourceTypeSolver will often be the fastest due to there being no possibility of encountering a SOE on lookup.
combinedTypeSolver.prepend(sourceTypeSolver)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import javassist.CtClass
import com.github.javaparser.symbolsolver.javassistmodel.JavassistFactory
import javassist.NotFoundException
import javassist.ClassPath
import io.shiftleft.semanticcpg.language.singleToEvalTypeAccessorsParameterOut

class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver {

Expand All @@ -29,11 +30,7 @@ class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver {
private var parent: Option[TypeSolver] = None
private val classPool = new NonCachingClassPool()

/** JavaParser replaces '$' in class names for nested classes with '.', while the names in the javassist class pool do
* not. This means we need to keep a record of javaparser to classpool name for class pool lookups, e.g.
* foo.bar.Baz.Qux -> foo.bar.Baz$Qux
*/
private val javaParserToClassPoolNames = mutable.Map[String, String]()
private val knownPackagePrefixes: mutable.Set[String] = mutable.Set.empty

private type RefType = ResolvedReferenceTypeDeclaration

Expand All @@ -50,10 +47,24 @@ class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver {
}

override def tryToSolveType(javaParserName: String): SymbolReference[ResolvedReferenceTypeDeclaration] = {
javaParserToClassPoolNames
.get(javaParserName)
.flatMap(lookupAndConvertClass)
.getOrElse(SymbolReference.unsolved(classOf[RefType]))
val packagePrefix = packagePrefixForJavaParserName(javaParserName)
if (knownPackagePrefixes.contains(packagePrefix)) {
lookupType(javaParserName)
} else {
SymbolReference.unsolved(classOf[RefType])
}
}

private def lookupType(javaParserName: String): SymbolReference[ResolvedReferenceTypeDeclaration] = {
val name = convertJavaParserNameToStandard(javaParserName)
Try(classPool.get(name)) match {
case Success(ctClass) =>
val refType = ctClassToRefType(ctClass)
refTypeToSymbolReference(refType)

case Failure(e) =>
SymbolReference.unsolved(classOf[RefType])
}
}

override def solveType(name: String): ResolvedReferenceTypeDeclaration = {
Expand All @@ -73,23 +84,6 @@ class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver {
SymbolReference.solved[RefType, RefType](refType)
}

private def lookupAndConvertClass(name: String): Option[SymbolReference[RefType]] = {
Try(classPool.get(name)) match {
case Success(ctClass) =>
val refType = ctClassToRefType(ctClass)
val solvedSymbol = refTypeToSymbolReference(refType)
Some(solvedSymbol)

case Failure(_: NotFoundException) =>
logger.error(s"BUG! Could not find class $name in class pool. This is not supposed to be possible!")
None

case Failure(e) =>
logger.warn("Unexpected exception getting $name from class pool", e)
None
}
}

private def addPathToClassPool(archivePath: String): Try[ClassPath] = {
if (archivePath.isJarPath) {
Try(classPool.appendClassPath(archivePath))
Expand All @@ -109,42 +103,29 @@ class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver {
def addArchives(archivePaths: Seq[String]): Unit = {
archivePaths.foreach { archivePath =>
addPathToClassPool(archivePath) match {
case Success(_) => registerKnownClassesForJar(archivePath)
case Success(_) => registerPackagesForJar(archivePath)

case Failure(e) =>
logger.warn(s"Could not load jar at path $archivePath", e.getMessage())
}
}
}

private def registerJarEntry(jarEntry: JarEntry): Unit = {
val entryName = jarEntry.getName()

if (!jarEntry.isDirectory && entryName.endsWith(ClassExtension)) {
val javaParserName = convertEntryPathToJavaParserName(entryName)
val classPoolName = convertEntryPathToClassPoolName(entryName)

// Avoid keeping 2 identical copies of the name.
if (javaParserName == classPoolName) {
javaParserToClassPoolNames.put(javaParserName, javaParserName)
} else {
javaParserToClassPoolNames.put(javaParserName, classPoolName)
}
}
}

private def registerKnownClassesForJar(jarPath: String): Unit = {
private def registerPackagesForJar(archivePath: String): Unit = {
val entryNameConverter = if (archivePath.isJarPath) packagePrefixForJarEntry else packagePrefixForJmodEntry
try {
Using(new JarFile(jarPath)) { jarFile =>
jarFile
.entries()
.asIterator()
.asScala
.foreach(registerJarEntry)
Using(new JarFile(archivePath)) { jarFile =>
knownPackagePrefixes ++=
jarFile
.entries()
.asIterator()
.asScala
.filter(entry => !entry.isDirectory() && entry.getName().endsWith(ClassExtension))
.map(entry => entryNameConverter(entry.getName()))
}
} catch {
case ioException: IOException =>
logger.warn(s"Could register classes for jar/jmod at $jarPath", ioException.getMessage())
logger.warn(s"Could register classes for archive at $archivePath", ioException.getMessage())
}
}
}
Expand All @@ -162,28 +143,67 @@ object JdkJarTypeSolver {

def fromJdkPath(jdkPath: String): JdkJarTypeSolver = {
val jarPaths = SourceFiles.determine(jdkPath, Set(JarExtension, JmodExtension))
if (jarPaths.isEmpty) {
throw new IllegalArgumentException(s"No .jar or .jmod files found at JDK path ${jdkPath}")
}
new JdkJarTypeSolver(jdkPath).withJars(jarPaths)
}

/** Convert the JarEntry path into the qualified name format expected by JavaParser
*
* JarEntry format : foo/bar/Baz$Qux.class JavaParser format: foo.bar.Baz.Qux
/** Convert JavaParser class name foo.bar.qux.Baz to package prefix foo.bar Only use first 2 parts since this is
* sufficient to deterimine whether a class has been registered in most cases and, if not, the failure is just a slow
* lookup.
*/
def convertEntryPathToJavaParserName(entryPath: String): String = {
convertEntryPathToClassPoolName(entryPath).replace('$', '.')
def packagePrefixForJavaParserName(className: String): String = {
className.split("\\.").take(2).mkString(".")
}

/** Convert the JarEntry path into the qualified name format expected by Javassist ClassPools
*
* JarEntry format : foo/bar/Baz$Qux.class ClassPool format: foo.bar.Baz$Qux
/** Convert Jar entry name foo/bar/qux/Baz.class to package prefix foo.bar Only use first 2 parts since this is
* sufficient to deterimine whether a class has been registered in most cases and, if not, the failure is just a slow
* lookup.
*/
def convertEntryPathToClassPoolName(entryPath: String): String = {
if (!entryPath.endsWith(ClassExtension)) {
throw new IllegalArgumentException(s"The entry path should end with $ClassExtension")
def packagePrefixForJarEntry(entryName: String): String = {
entryName.split("/").take(2).mkString(".")
}

/** Convert jmod entry name classes/foo/bar/qux/Baz.class to package prefix foo.bar Only use first 2 parts since this
* is sufficient to deterimine whether a class has been registered in most cases and, if not, the failure is just a
* slow lookup.
*/
def packagePrefixForJmodEntry(entryName: String): String = {
packagePrefixForJarEntry(entryName.stripPrefix(JmodClassPrefix))
}

/** A name is assumed to contain at least one subclass (e.g. ...Foo$Bar) if the last name part starts with a digit, or
* if the last 2 name parts start with capital letters. This heuristic is based on the class name format in the JDK
* jars, where names with subclasses have one of the forms:
* - java.lang.ClassLoader$2
* - java.lang.ClassLoader$NativeLibrary
* - java.lang.ClassLoader$NativeLibrary$Unloader
*/
private def namePartsContainSubclass(nameParts: Array[String]): Boolean = {
nameParts.takeRight(2) match {
case Array() => false

case Array(singlePart) => false

case Array(secondLast, last) =>
last.head.isDigit || (secondLast.head.isUpper && last.head.isUpper)
}
entryPath
.stripPrefix(JmodClassPrefix)
.stripSuffix(ClassExtension)
.replace('/', '.')
}

/** JavaParser replaces the `$` in nested class names with a `.`. This method converts the JavaParser names to the
* standard format by replacing the `.` between name parts that start with a capital letter or a digit with a `$`
* since the jdk classes follow the standard practice of capitalising the first letter in class names but not package
* names.
*/
def convertJavaParserNameToStandard(className: String): String = {
className.split(".") match {
case nameParts if namePartsContainSubclass(nameParts) =>
val (packagePrefix, classNames) = nameParts.partition(_.head.isLower)
s"${packagePrefix.mkString(".")}.${classNames.mkString("$")}"

case _ => className
}

}
}