diff --git a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/JavaSrc2Cpg.scala b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/JavaSrc2Cpg.scala index 373806a112a9..3ce68ebb5133 100644 --- a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/JavaSrc2Cpg.scala +++ b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/JavaSrc2Cpg.scala @@ -50,4 +50,22 @@ object JavaSrc2Cpg { new JavaTypeHintCallLinker(cpg) ) } + + def showEnv(): Unit = { + val value = + JavaSrcEnvVar.values.foreach { envVar => + val currentValue = Option(System.getenv(envVar.name)).getOrElse("") + println(s"${envVar.name}:") + println(s" Description : ${envVar.description}") + println(s" Current value: $currentValue") + } + } + + enum JavaSrcEnvVar(val name: String, val description: String) { + case JdkPath + extends JavaSrcEnvVar( + "JAVASRC_JDK_PATH", + "Path to the JDK home used for retrieving type information about builtin Java types." + ) + } } diff --git a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/Main.scala b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/Main.scala index 9ed16f78fbe7..d18be7285655 100644 --- a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/Main.scala +++ b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/Main.scala @@ -14,7 +14,8 @@ final case class Config( delombokMode: Option[String] = None, enableTypeRecovery: Boolean = false, disableDummyTypes: Boolean = false, - jdkPath: Option[String] = None + jdkPath: Option[String] = None, + showEnv: Boolean = false ) extends X2CpgConfig[Config] { def withInferenceJarPaths(paths: Set[String]): Config = { copy(inferenceJarPaths = paths).withInheritedFields(this) @@ -47,6 +48,10 @@ final case class Config( def withJdkPath(path: String): Config = { copy(jdkPath = Some(path)).withInheritedFields(this) } + + def withShowEnv(value: Boolean): Config = { + copy(showEnv = value).withInheritedFields(this) + } } private object Frontend { @@ -84,14 +89,32 @@ private object Frontend { .text("disable generation of dummy types during type recovery"), opt[String]("jdk-path") .action((path, c) => c.withJdkPath(path)) - .text("JDK used for resolving builtin Java types. If not set, current classpath will be used") + .text("JDK used for resolving builtin Java types. If not set, current classpath will be used"), + opt[Unit]("show-env") + .action((_, c) => c.withShowEnv(true)) + .text("print information about environment variables used by javasrc2cpg and exit.") ) } } object Main extends X2CpgMain(cmdLineParser, new JavaSrc2Cpg()) { + + override def main(args: Array[String]): Unit = { + // TODO: This is a hack to allow users to use the "--show-env" option without having + // to specify an input argument. Clean this up when adding this option to more frontends. + if (args.contains("--show-env")) { + super.main(Array("--show-env", "")) + } else { + super.main(args) + } + } + def run(config: Config, javasrc2Cpg: JavaSrc2Cpg): Unit = { - javasrc2Cpg.run(config) + if (config.showEnv) { + JavaSrc2Cpg.showEnv() + } else { + javasrc2Cpg.run(config) + } } def getCmdLineParser = cmdLineParser diff --git a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala index a474aeb6e796..ba5bce57ec03 100644 --- a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/passes/AstCreationPass.scala @@ -29,6 +29,7 @@ import com.github.javaparser.symbolsolver.resolution.typesolvers.ClassLoaderType import java.net.URLClassLoader import com.github.javaparser.symbolsolver.resolution.typesolvers.ReflectionTypeSolver import io.joern.javasrc2cpg.typesolvers.CachingReflectionTypeSolver +import io.joern.javasrc2cpg.JavaSrc2Cpg.JavaSrcEnvVar case class SourceDirectoryInfo(typeSolverSourceDirs: List[String], sourceFiles: List[SourceFileInfo]) case class SplitDirectories(analysisSourceDir: String, typesSourceDir: String) @@ -90,15 +91,28 @@ class AstCreationPass(config: Config, cpg: Cpg, preCreatedAsts: Option[SplitJpAs val combinedTypeSolver = new SimpleCombinedTypeSolver() - config.jdkPath match { - case Some(path) => - val jdkJarTypeSolver = JdkJarTypeSolver.fromJdkPath(path) - combinedTypeSolver.add(jdkJarTypeSolver) - - case None => - combinedTypeSolver.add(new CachingReflectionTypeSolver()) + val jdkPathFromEnvVar = Option(System.getenv(JavaSrcEnvVar.JdkPath.name)) + val jdkPath = (config.jdkPath, jdkPathFromEnvVar) match { + case (None, None) => + val javaHome = System.getProperty("java.home") + logger.info( + s"No explicit jdk-path set in config, so using system java.home for JDK type information: $javaHome" + ) + javaHome + + case (None, Some(jdkPath)) => + logger.info( + s"Using JDK path from environment variable ${JavaSrcEnvVar.JdkPath.name} for JDK type information: $jdkPath" + ) + jdkPath + + case (Some(jdkPath), _) => + logger.info(s"Using JDK path set with jdk-path option for JDK type information: $jdkPath") + jdkPath } + combinedTypeSolver.add(JdkJarTypeSolver.fromJdkPath(jdkPath)) + val sourceTypeSolver = EagerSourceTypeSolver(typesAsts, combinedTypeSolver) // The sourceTypeSolver will often be the fastest due to there being no possibility of encountering a SOE on lookup. combinedTypeSolver.prepend(sourceTypeSolver) diff --git a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/typesolvers/noncaching/JdkJarTypeSolver.scala b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/typesolvers/noncaching/JdkJarTypeSolver.scala index 5be3e0601e38..7cc7f4a7ca8e 100644 --- a/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/typesolvers/noncaching/JdkJarTypeSolver.scala +++ b/joern-cli/frontends/javasrc2cpg/src/main/scala/io/joern/javasrc2cpg/typesolvers/noncaching/JdkJarTypeSolver.scala @@ -21,6 +21,7 @@ import javassist.CtClass import com.github.javaparser.symbolsolver.javassistmodel.JavassistFactory import javassist.NotFoundException import javassist.ClassPath +import io.shiftleft.semanticcpg.language.singleToEvalTypeAccessorsParameterOut class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver { @@ -29,11 +30,7 @@ class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver { private var parent: Option[TypeSolver] = None private val classPool = new NonCachingClassPool() - /** JavaParser replaces '$' in class names for nested classes with '.', while the names in the javassist class pool do - * not. This means we need to keep a record of javaparser to classpool name for class pool lookups, e.g. - * foo.bar.Baz.Qux -> foo.bar.Baz$Qux - */ - private val javaParserToClassPoolNames = mutable.Map[String, String]() + private val knownPackagePrefixes: mutable.Set[String] = mutable.Set.empty private type RefType = ResolvedReferenceTypeDeclaration @@ -50,10 +47,24 @@ class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver { } override def tryToSolveType(javaParserName: String): SymbolReference[ResolvedReferenceTypeDeclaration] = { - javaParserToClassPoolNames - .get(javaParserName) - .flatMap(lookupAndConvertClass) - .getOrElse(SymbolReference.unsolved(classOf[RefType])) + val packagePrefix = packagePrefixForJavaParserName(javaParserName) + if (knownPackagePrefixes.contains(packagePrefix)) { + lookupType(javaParserName) + } else { + SymbolReference.unsolved(classOf[RefType]) + } + } + + private def lookupType(javaParserName: String): SymbolReference[ResolvedReferenceTypeDeclaration] = { + val name = convertJavaParserNameToStandard(javaParserName) + Try(classPool.get(name)) match { + case Success(ctClass) => + val refType = ctClassToRefType(ctClass) + refTypeToSymbolReference(refType) + + case Failure(e) => + SymbolReference.unsolved(classOf[RefType]) + } } override def solveType(name: String): ResolvedReferenceTypeDeclaration = { @@ -73,23 +84,6 @@ class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver { SymbolReference.solved[RefType, RefType](refType) } - private def lookupAndConvertClass(name: String): Option[SymbolReference[RefType]] = { - Try(classPool.get(name)) match { - case Success(ctClass) => - val refType = ctClassToRefType(ctClass) - val solvedSymbol = refTypeToSymbolReference(refType) - Some(solvedSymbol) - - case Failure(_: NotFoundException) => - logger.error(s"BUG! Could not find class $name in class pool. This is not supposed to be possible!") - None - - case Failure(e) => - logger.warn("Unexpected exception getting $name from class pool", e) - None - } - } - private def addPathToClassPool(archivePath: String): Try[ClassPath] = { if (archivePath.isJarPath) { Try(classPool.appendClassPath(archivePath)) @@ -109,7 +103,7 @@ class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver { def addArchives(archivePaths: Seq[String]): Unit = { archivePaths.foreach { archivePath => addPathToClassPool(archivePath) match { - case Success(_) => registerKnownClassesForJar(archivePath) + case Success(_) => registerPackagesForJar(archivePath) case Failure(e) => logger.warn(s"Could not load jar at path $archivePath", e.getMessage()) @@ -117,34 +111,21 @@ class JdkJarTypeSolver private (jdkPath: String) extends TypeSolver { } } - private def registerJarEntry(jarEntry: JarEntry): Unit = { - val entryName = jarEntry.getName() - - if (!jarEntry.isDirectory && entryName.endsWith(ClassExtension)) { - val javaParserName = convertEntryPathToJavaParserName(entryName) - val classPoolName = convertEntryPathToClassPoolName(entryName) - - // Avoid keeping 2 identical copies of the name. - if (javaParserName == classPoolName) { - javaParserToClassPoolNames.put(javaParserName, javaParserName) - } else { - javaParserToClassPoolNames.put(javaParserName, classPoolName) - } - } - } - - private def registerKnownClassesForJar(jarPath: String): Unit = { + private def registerPackagesForJar(archivePath: String): Unit = { + val entryNameConverter = if (archivePath.isJarPath) packagePrefixForJarEntry else packagePrefixForJmodEntry try { - Using(new JarFile(jarPath)) { jarFile => - jarFile - .entries() - .asIterator() - .asScala - .foreach(registerJarEntry) + Using(new JarFile(archivePath)) { jarFile => + knownPackagePrefixes ++= + jarFile + .entries() + .asIterator() + .asScala + .filter(entry => !entry.isDirectory() && entry.getName().endsWith(ClassExtension)) + .map(entry => entryNameConverter(entry.getName())) } } catch { case ioException: IOException => - logger.warn(s"Could register classes for jar/jmod at $jarPath", ioException.getMessage()) + logger.warn(s"Could register classes for archive at $archivePath", ioException.getMessage()) } } } @@ -162,28 +143,67 @@ object JdkJarTypeSolver { def fromJdkPath(jdkPath: String): JdkJarTypeSolver = { val jarPaths = SourceFiles.determine(jdkPath, Set(JarExtension, JmodExtension)) + if (jarPaths.isEmpty) { + throw new IllegalArgumentException(s"No .jar or .jmod files found at JDK path ${jdkPath}") + } new JdkJarTypeSolver(jdkPath).withJars(jarPaths) } - /** Convert the JarEntry path into the qualified name format expected by JavaParser - * - * JarEntry format : foo/bar/Baz$Qux.class JavaParser format: foo.bar.Baz.Qux + /** Convert JavaParser class name foo.bar.qux.Baz to package prefix foo.bar Only use first 2 parts since this is + * sufficient to deterimine whether a class has been registered in most cases and, if not, the failure is just a slow + * lookup. */ - def convertEntryPathToJavaParserName(entryPath: String): String = { - convertEntryPathToClassPoolName(entryPath).replace('$', '.') + def packagePrefixForJavaParserName(className: String): String = { + className.split("\\.").take(2).mkString(".") } - /** Convert the JarEntry path into the qualified name format expected by Javassist ClassPools - * - * JarEntry format : foo/bar/Baz$Qux.class ClassPool format: foo.bar.Baz$Qux + /** Convert Jar entry name foo/bar/qux/Baz.class to package prefix foo.bar Only use first 2 parts since this is + * sufficient to deterimine whether a class has been registered in most cases and, if not, the failure is just a slow + * lookup. */ - def convertEntryPathToClassPoolName(entryPath: String): String = { - if (!entryPath.endsWith(ClassExtension)) { - throw new IllegalArgumentException(s"The entry path should end with $ClassExtension") + def packagePrefixForJarEntry(entryName: String): String = { + entryName.split("/").take(2).mkString(".") + } + + /** Convert jmod entry name classes/foo/bar/qux/Baz.class to package prefix foo.bar Only use first 2 parts since this + * is sufficient to deterimine whether a class has been registered in most cases and, if not, the failure is just a + * slow lookup. + */ + def packagePrefixForJmodEntry(entryName: String): String = { + packagePrefixForJarEntry(entryName.stripPrefix(JmodClassPrefix)) + } + + /** A name is assumed to contain at least one subclass (e.g. ...Foo$Bar) if the last name part starts with a digit, or + * if the last 2 name parts start with capital letters. This heuristic is based on the class name format in the JDK + * jars, where names with subclasses have one of the forms: + * - java.lang.ClassLoader$2 + * - java.lang.ClassLoader$NativeLibrary + * - java.lang.ClassLoader$NativeLibrary$Unloader + */ + private def namePartsContainSubclass(nameParts: Array[String]): Boolean = { + nameParts.takeRight(2) match { + case Array() => false + + case Array(singlePart) => false + + case Array(secondLast, last) => + last.head.isDigit || (secondLast.head.isUpper && last.head.isUpper) } - entryPath - .stripPrefix(JmodClassPrefix) - .stripSuffix(ClassExtension) - .replace('/', '.') + } + + /** JavaParser replaces the `$` in nested class names with a `.`. This method converts the JavaParser names to the + * standard format by replacing the `.` between name parts that start with a capital letter or a digit with a `$` + * since the jdk classes follow the standard practice of capitalising the first letter in class names but not package + * names. + */ + def convertJavaParserNameToStandard(className: String): String = { + className.split(".") match { + case nameParts if namePartsContainSubclass(nameParts) => + val (packagePrefix, classNames) = nameParts.partition(_.head.isLower) + s"${packagePrefix.mkString(".")}.${classNames.mkString("$")}" + + case _ => className + } + } }