Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[jimple2cpg] Consistent & Recursive JAR/WAR Unpacking #3078

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,11 @@ null
**/goastgen-macos
**/goastgen-macos-arm64
slices.json

##############
# Nix flakes #
##############
.envrc
flake.nix
flake.lock
.direnv
Original file line number Diff line number Diff line change
@@ -1,60 +1,37 @@
package io.joern.jimple2cpg

import io.joern.jimple2cpg.passes.SootAstCreationPass
import io.joern.jimple2cpg.passes.AstCreationPass
import io.joern.jimple2cpg.util.ProgramHandlingUtil
import io.joern.jimple2cpg.util.ProgramHandlingUtil.{extractSourceFilesFromArchive, moveClassFiles}
import better.files.File
import io.joern.jimple2cpg.passes.{AstCreationPass, SootAstCreationPass}
import io.joern.jimple2cpg.util.ProgramHandlingUtil.{ClassFile, extractClassesInPackageLayout}
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.datastructures.Global
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass}
import io.joern.x2cpg.{SourceFiles, X2CpgFrontend}
import io.shiftleft.codepropertygraph.Cpg
import org.apache.commons.io.FileUtils
import org.slf4j.LoggerFactory
import soot.options.Options
import soot.{G, PackManager, Scene}

import java.io.{File => JFile}
import java.nio.file.Paths
import org.apache.commons.io.FileUtils
import scala.jdk.CollectionConverters.{EnumerationHasAsScala, SeqHasAsJava}
import scala.language.postfixOps
import scala.util.Try

object Jimple2Cpg {
val language = "JAVA"

/** Formats the file name the way Soot refers to classes within a class path. e.g.
* /unrelated/paths/class/path/Foo.class => class.path.Foo
*
* @param filename
* the file name to transform.
* @return
* the correctly formatted class path.
*/
def getQualifiedClassPath(filename: String): String = {
val codePath = ProgramHandlingUtil.getUnpackingDir

val codeDir: String = if (codePath.toFile.isDirectory) {
codePath.toAbsolutePath.normalize.toString
} else {
Paths.get(codePath.toFile.getParentFile.getAbsolutePath).normalize.toString
}
filename
.replace(codeDir + JFile.separator, "")
.replace(".class", "")
.replace(JFile.separator, ".")
}

def apply(): Jimple2Cpg = new Jimple2Cpg()
}

class Jimple2Cpg extends X2CpgFrontend[Config] {

import Jimple2Cpg._
import Jimple2Cpg.*

private val logger = LoggerFactory.getLogger(classOf[Jimple2Cpg])

def sootLoadApk(input: String, framework: Option[String] = None): Unit = {
Options.v().set_process_dir(List(input).asJava)
def sootLoadApk(input: File, framework: Option[String] = None): Unit = {
Options.v().set_process_dir(List(input.canonicalPath).asJava)
framework match {
case Some(value) if value.nonEmpty => {
Options.v().set_src_prec(Options.src_prec_apk)
Expand All @@ -70,106 +47,91 @@ class Jimple2Cpg extends X2CpgFrontend[Config] {
Options.v().setPhaseOption("jb", "use-original-names:false")
}

def sootLoadClass(inputDir: String): Unit = {
Options.v().set_process_dir(List(inputDir).asJava)
Options.v().set_src_prec(Options.src_prec_class)
/** Load all class files from archives or directories recursively
* @return
* The list of extracted class files whose package path could be extracted, placed on that package path relative to
* [[tmpDir]]
*/
private def loadClassFiles(src: File, tmpDir: File): List[ClassFile] = {
val archiveFileExtensions = Set(".jar", ".war", ".zip")
extractClassesInPackageLayout(
src,
tmpDir,
isClass = e => e.extension.exists(_ == ".class"),
isArchive = e => e.extension.exists(archiveFileExtensions.contains)
)
}

def sootLoadSource(input: String, ext: String): Unit = {
// Soot does not support loading single class/jimple file using path, so we move it to temp dir first
// NOTE: Soot’s frontend for Java source files is outdated (only partially supports Java version up to 7) and not very robust.
val src = new JFile(input)
val dst = new JFile(ProgramHandlingUtil.getUnpackingDir.toString, src.getName)
val prec = ext match {
case "jimple" => Options.src_prec_jimple
case _ => Options.src_prec_class
/** Extract all class files found, place them in their package layout and load them into soot.
* @param input
* The file/directory to traverse for class files.
* @param tmpDir
* The directory to place the class files in their package layout
*/
private def sootLoadRecursively(input: File, tmpDir: File): List[ClassFile] = {
Options.v().set_soot_classpath(tmpDir.canonicalPath)
Options.v().set_prepend_classpath(true)
val classFiles = loadClassFiles(input, tmpDir)
val fullyQualifiedClassNames = classFiles.flatMap(_.fullyQualifiedClassName)
logger.info(s"Loading ${classFiles.size} program files")
logger.debug(s"Source files are: ${classFiles.map(_.file.canonicalPath)}")
fullyQualifiedClassNames.foreach { fqcn =>
Scene.v().addBasicClass(fqcn)
Scene.v().loadClassAndSupport(fqcn)
}
FileUtils.copyFile(src, dst)
Options.v().set_process_dir(List(ProgramHandlingUtil.getUnpackingDir.toString).asJava)
Options.v().set_src_prec(prec)
classFiles
}

def createCpg(config: Config): Try[Cpg] = {
val ret = withNewEmptyCpg(config.outputPath, config: Config) { (cpg, config) =>
val inputPath = new JFile(config.inputPath)

configureSoot()
new MetaDataPass(cpg, language, config.inputPath).createAndApply()
Options.v().set_dynamic_dir(config.dynamicDirs.toList.asJava)
Options.v().set_dynamic_package(config.dynamicPkgs.toList.asJava)
if (config.fullResolver) {
// full transitive resolution of all references
Options.v().set_full_resolver(true)
}
if (inputPath.isDirectory()) {
// make sure classpath is configured correctly
Options.v().set_soot_classpath(ProgramHandlingUtil.getUnpackingDir.toString)
Options.v().set_prepend_classpath(true)
val sourceFileExtensions = Set(".class", ".jimple")
val archiveFileExtensions = Set(".jar", ".war")
// Load source files and unpack archives if necessary
val sourceFileNames = loadSourceFiles(config.inputPath, sourceFileExtensions, archiveFileExtensions)
logger.info(s"Loading ${sourceFileNames.size} program files")
logger.debug(s"Source files are: $sourceFileNames")
loadClassesIntoSoot(sourceFileNames)
val astCreator = new AstCreationPass(sourceFileNames, cpg)
astCreator.createAndApply()
TypeNodePass
.withRegisteredTypes(astCreator.global.usedTypes.keys().asScala.toList, cpg)
.createAndApply()
} else {
val ext = config.inputPath.split("\\.").lastOption.getOrElse("")
ext match {
case "jar" | "zip" => sootLoadClass(config.inputPath)
case "apk" | "dex" => sootLoadApk(config.inputPath, config.android)
case "jimple" | "class" => sootLoadSource(config.inputPath, ext)
// case "war" => sootLoadClass(unpackPath/WEB-INF/classes)
case _ => {
logger.warn(s"Don't know how to handle input: $inputPath")
throw new RuntimeException(s"Unsupported input at ${config.inputPath}")
}
/** Apply the soot passes
* @param cpg
* @param config
* @param tmpDir
* A temporary directory that will be used as the classpath for extracted class files
*/
private def cpgApplyPasses(cpg: Cpg, config: Config, tmpDir: File): Unit = {
val input = File(config.inputPath)
configureSoot(config, tmpDir)
new MetaDataPass(cpg, language, config.inputPath).createAndApply()

val globalFromAstCreation: () => Global = input.extension match {
case Some(".apk" | ".dex") if input.isRegularFile =>
sootLoadApk(input, config.android)
{ () =>
val astCreator = SootAstCreationPass(cpg)
astCreator.createAndApply()
astCreator.global
}
case _ =>
val classFiles = sootLoadRecursively(input, tmpDir)
{ () =>
val astCreator = AstCreationPass(classFiles, cpg)
astCreator.createAndApply()
astCreator.global
}
logger.info("Loading classes to soot")
Scene.v().loadNecessaryClasses()
logger.info(s"Loaded ${Scene.v().getApplicationClasses().size()} classes")
val astCreator = new SootAstCreationPass(cpg)
astCreator.createAndApply()
TypeNodePass
.withRegisteredTypes(astCreator.global.usedTypes.keys().asScala.toList, cpg)
.createAndApply()
}

// Clear classes from Soot
G.reset()
}
clean()
ret
}

/** Load all source files from archive and/or source file types.
*/
private def loadSourceFiles(
sourceCodePath: String,
sourceFileExtensions: Set[String],
archiveFileExtensions: Set[String]
): List[String] = {
(
extractSourceFilesFromArchive(sourceCodePath, archiveFileExtensions) ++
moveClassFiles(SourceFiles.determine(sourceCodePath, sourceFileExtensions))
).distinct
logger.info("Loading classes to soot")
Scene.v().loadNecessaryClasses()
logger.info(s"Loaded ${Scene.v().getApplicationClasses().size()} classes")

val global = globalFromAstCreation()
TypeNodePass
.withRegisteredTypes(global.usedTypes.keys().asScala.toList, cpg)
.createAndApply()
}

private def loadClassesIntoSoot(sourceFileNames: List[String]): Unit = {
sourceFileNames
.map(getQualifiedClassPath)
.foreach { cp =>
Scene.v().addBasicClass(cp)
Scene.v().loadClassAndSupport(cp)
override def createCpg(config: Config): Try[Cpg] =
try {
withNewEmptyCpg(config.outputPath, config: Config) { (cpg, config) =>
File.temporaryDirectory("jimple2cpg-").apply { tmpDir =>
cpgApplyPasses(cpg, config, tmpDir)
}
}
Scene.v().loadNecessaryClasses()
}
} finally {
G.reset()
}

private def configureSoot(): Unit = {
private def configureSoot(config: Config, outDir: File): Unit = {
// set application mode
Options.v().set_app(false)
Options.v().set_whole_program(false)
Expand All @@ -184,12 +146,14 @@ class Jimple2Cpg extends X2CpgFrontend[Config] {
Options.v().setPhaseOption("jb", "use-original-names:true")
// output jimple
Options.v().set_output_format(Options.output_format_jimple)
Options.v().set_output_dir(ProgramHandlingUtil.getUnpackingDir.toString)
}
Options.v().set_output_dir(outDir.canonicalPath)

private def clean(): Unit = {
G.reset()
ProgramHandlingUtil.clean()
}
Options.v().set_dynamic_dir(config.dynamicDirs.asJava)
Options.v().set_dynamic_package(config.dynamicPkgs.asJava)

if (config.fullResolver) {
// full transitive resolution of all references
Options.v().set_full_resolver(true)
}
}
}
Original file line number Diff line number Diff line change
@@ -1,31 +1,36 @@
package io.joern.jimple2cpg.passes

import better.files.File
import io.joern.jimple2cpg.Jimple2Cpg
import io.joern.jimple2cpg.util.ProgramHandlingUtil.ClassFile
import io.joern.x2cpg.datastructures.Global
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.passes.ConcurrentWriterCpgPass
import org.slf4j.LoggerFactory
import soot.Scene

/** Creates the AST layer from the given class file and stores all types in the given global parameter.
* @param classFiles
* List of class files and their fully qualified class names
* @param cpg
* The CPG to add to
*/
class AstCreationPass(filenames: List[String], cpg: Cpg) extends ConcurrentWriterCpgPass[String](cpg) {
class AstCreationPass(classFiles: List[ClassFile], cpg: Cpg) extends ConcurrentWriterCpgPass[ClassFile](cpg) {

val global: Global = new Global()
private val logger = LoggerFactory.getLogger(classOf[AstCreationPass])

override def generateParts(): Array[_ <: AnyRef] = filenames.toArray
override def generateParts(): Array[_ <: AnyRef] = classFiles.toArray

override def runOnPart(builder: DiffGraphBuilder, part: String): Unit = {
val qualifiedClassName = Jimple2Cpg.getQualifiedClassPath(part)
override def runOnPart(builder: DiffGraphBuilder, classFile: ClassFile): Unit = {
try {
val sootClass = Scene.v().loadClassAndSupport(qualifiedClassName)
val sootClass = Scene.v().loadClassAndSupport(classFile.fullyQualifiedClassName.get)
sootClass.setApplicationClass()
val localDiff = new AstCreator(part, sootClass, global).createAst()
val localDiff = AstCreator(classFile.file.canonicalPath, sootClass, global).createAst()
builder.absorb(localDiff)
} catch {
case e: Exception =>
logger.warn(s"Cannot parse: $part ($qualifiedClassName)", e)
logger.warn(s"Exception on AST creation for ${classFile.file.canonicalPath}", e)
Iterator()
}
}
Expand Down
Loading