Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[jimple2cpg] Consistent & Recursive JAR/WAR Unpacking #3078

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
use flake
badly-drawn-wizards marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@ null
**/goastgen-macos
**/goastgen-macos-arm64
slices.json
.direnv
61 changes: 61 additions & 0 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
badly-drawn-wizards marked this conversation as resolved.
Show resolved Hide resolved
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
utils.url = "github:gytis-ivaskevicius/flake-utils-plus";
};

outputs = { self, nixpkgs, utils, ... }@inputs:
utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
in
{
devShell = pkgs.mkShell {
buildInputs = with pkgs; [
sbt
jdk19
graphviz
];
};
});
}
Original file line number Diff line number Diff line change
@@ -1,60 +1,37 @@
package io.joern.jimple2cpg

import io.joern.jimple2cpg.passes.SootAstCreationPass
import io.joern.jimple2cpg.passes.AstCreationPass
import io.joern.jimple2cpg.util.ProgramHandlingUtil
import io.joern.jimple2cpg.util.ProgramHandlingUtil.{extractSourceFilesFromArchive, moveClassFiles}
import better.files.File
import io.joern.jimple2cpg.passes.{AstCreationPass, SootAstCreationPass}
import io.joern.jimple2cpg.util.ProgramHandlingUtil.{ClassFile, extractClassesInPackageLayout}
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.datastructures.Global
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass}
import io.joern.x2cpg.{SourceFiles, X2CpgFrontend}
import io.shiftleft.codepropertygraph.Cpg
import org.apache.commons.io.FileUtils
import org.slf4j.LoggerFactory
import soot.options.Options
import soot.{G, PackManager, Scene}

import java.io.{File => JFile}
import java.nio.file.Paths
import org.apache.commons.io.FileUtils
import scala.jdk.CollectionConverters.{EnumerationHasAsScala, SeqHasAsJava}
import scala.language.postfixOps
import scala.util.Try

object Jimple2Cpg {
val language = "JAVA"

/** Formats the file name the way Soot refers to classes within a class path. e.g.
* /unrelated/paths/class/path/Foo.class => class.path.Foo
*
* @param filename
* the file name to transform.
* @return
* the correctly formatted class path.
*/
def getQualifiedClassPath(filename: String): String = {
val codePath = ProgramHandlingUtil.getUnpackingDir

val codeDir: String = if (codePath.toFile.isDirectory) {
codePath.toAbsolutePath.normalize.toString
} else {
Paths.get(codePath.toFile.getParentFile.getAbsolutePath).normalize.toString
}
filename
.replace(codeDir + JFile.separator, "")
.replace(".class", "")
.replace(JFile.separator, ".")
}

def apply(): Jimple2Cpg = new Jimple2Cpg()
}

class Jimple2Cpg extends X2CpgFrontend[Config] {

import Jimple2Cpg._
import Jimple2Cpg.*

private val logger = LoggerFactory.getLogger(classOf[Jimple2Cpg])

def sootLoadApk(input: String, framework: Option[String] = None): Unit = {
Options.v().set_process_dir(List(input).asJava)
def sootLoadApk(input: File, framework: Option[String] = None): Unit = {
Options.v().set_process_dir(List(input.canonicalPath).asJava)
framework match {
case Some(value) if value.nonEmpty => {
Options.v().set_src_prec(Options.src_prec_apk)
Expand All @@ -75,101 +52,77 @@ class Jimple2Cpg extends X2CpgFrontend[Config] {
Options.v().set_src_prec(Options.src_prec_class)
}

def sootLoadSource(input: String, ext: String): Unit = {
// Soot does not support loading single class/jimple file using path, so we move it to temp dir first
// NOTE: Soot’s frontend for Java source files is outdated (only partially supports Java version up to 7) and not very robust.
val src = new JFile(input)
val dst = new JFile(ProgramHandlingUtil.getUnpackingDir.toString, src.getName)
val prec = ext match {
case "jimple" => Options.src_prec_jimple
case _ => Options.src_prec_class
/** Load all class files from archives or directories recursively
* @return
* The list of extracted class files and the components of their fully qualified class names
*/
private def loadClassFiles(src: File, tmpDir: File): List[ClassFile] = {
val archiveFileExtensions = Set(".jar", ".war", ".zip")
extractClassesInPackageLayout(
src,
tmpDir,
isClass = e => e.extension.contains(".class"),
badly-drawn-wizards marked this conversation as resolved.
Show resolved Hide resolved
isArchive = e => e.extension.exists(archiveFileExtensions.contains)
)
}
private def sootLoadRecursively(input: File, tmpDir: File, cpg: Cpg, config: Config): List[ClassFile] = {
Options.v().set_soot_classpath(tmpDir.canonicalPath)
Options.v().set_prepend_classpath(true)
val classFiles = loadClassFiles(input, tmpDir)
val fqcns = classFiles.flatMap(_.fqcn)
logger.info(s"Loading ${classFiles.size} program files")
logger.debug(s"Source files are: ${classFiles.map(_.file.canonicalPath)}")
fqcns.foreach { fqcn =>
Scene.v().addBasicClass(fqcn)
Scene.v().loadClassAndSupport(fqcn)
}
FileUtils.copyFile(src, dst)
Options.v().set_process_dir(List(ProgramHandlingUtil.getUnpackingDir.toString).asJava)
Options.v().set_src_prec(prec)
classFiles
}

def createCpg(config: Config): Try[Cpg] = {
val ret = withNewEmptyCpg(config.outputPath, config: Config) { (cpg, config) =>
val inputPath = new JFile(config.inputPath)

configureSoot()
new MetaDataPass(cpg, language, config.inputPath).createAndApply()
Options.v().set_dynamic_dir(config.dynamicDirs.toList.asJava)
Options.v().set_dynamic_package(config.dynamicPkgs.toList.asJava)
if (config.fullResolver) {
// full transitive resolution of all references
Options.v().set_full_resolver(true)
}
if (inputPath.isDirectory()) {
// make sure classpath is configured correctly
Options.v().set_soot_classpath(ProgramHandlingUtil.getUnpackingDir.toString)
Options.v().set_prepend_classpath(true)
val sourceFileExtensions = Set(".class", ".jimple")
val archiveFileExtensions = Set(".jar", ".war")
// Load source files and unpack archives if necessary
val sourceFileNames = loadSourceFiles(config.inputPath, sourceFileExtensions, archiveFileExtensions)
logger.info(s"Loading ${sourceFileNames.size} program files")
logger.debug(s"Source files are: $sourceFileNames")
loadClassesIntoSoot(sourceFileNames)
val astCreator = new AstCreationPass(sourceFileNames, cpg)
astCreator.createAndApply()
TypeNodePass
.withRegisteredTypes(astCreator.global.usedTypes.keys().asScala.toList, cpg)
.createAndApply()
} else {
val ext = config.inputPath.split("\\.").lastOption.getOrElse("")
ext match {
case "jar" | "zip" => sootLoadClass(config.inputPath)
case "apk" | "dex" => sootLoadApk(config.inputPath, config.android)
case "jimple" | "class" => sootLoadSource(config.inputPath, ext)
// case "war" => sootLoadClass(unpackPath/WEB-INF/classes)
case _ => {
logger.warn(s"Don't know how to handle input: $inputPath")
throw new RuntimeException(s"Unsupported input at ${config.inputPath}")
}
private def cpgApplyPasses(cpg: Cpg, config: Config, tmpDir: File): Unit = {
val input = File(config.inputPath)
configureSoot(config, tmpDir)
new MetaDataPass(cpg, language, config.inputPath).createAndApply()

val globalFromAstCreation: () => Global = input.extension match {
case Some(".apk" | ".dex") if input.isRegularFile =>
sootLoadApk(input, config.android)
{ () =>
val astCreator = SootAstCreationPass(cpg)
astCreator.createAndApply()
astCreator.global
}
case _ =>
val classFiles = sootLoadRecursively(input, tmpDir, cpg, config)
{ () =>
val astCreator = AstCreationPass(classFiles, cpg)
astCreator.createAndApply()
astCreator.global
}
logger.info("Loading classes to soot")
Scene.v().loadNecessaryClasses()
logger.info(s"Loaded ${Scene.v().getApplicationClasses().size()} classes")
val astCreator = new SootAstCreationPass(cpg)
astCreator.createAndApply()
TypeNodePass
.withRegisteredTypes(astCreator.global.usedTypes.keys().asScala.toList, cpg)
.createAndApply()
}

// Clear classes from Soot
G.reset()
}
clean()
ret
}

/** Load all source files from archive and/or source file types.
*/
private def loadSourceFiles(
sourceCodePath: String,
sourceFileExtensions: Set[String],
archiveFileExtensions: Set[String]
): List[String] = {
(
extractSourceFilesFromArchive(sourceCodePath, archiveFileExtensions) ++
moveClassFiles(SourceFiles.determine(sourceCodePath, sourceFileExtensions))
).distinct
logger.info("Loading classes to soot")
Scene.v().loadNecessaryClasses()
logger.info(s"Loaded ${Scene.v().getApplicationClasses().size()} classes")

val global = globalFromAstCreation()
TypeNodePass
.withRegisteredTypes(global.usedTypes.keys().asScala.toList, cpg)
.createAndApply()
}

private def loadClassesIntoSoot(sourceFileNames: List[String]): Unit = {
sourceFileNames
.map(getQualifiedClassPath)
.foreach { cp =>
Scene.v().addBasicClass(cp)
Scene.v().loadClassAndSupport(cp)
def createCpg(config: Config): Try[Cpg] =
try {
withNewEmptyCpg(config.outputPath, config: Config) { (cpg, config) =>
File.temporaryDirectory("jimple2cpg-").apply { tmpDir =>
cpgApplyPasses(cpg, config, tmpDir)
}
}
Scene.v().loadNecessaryClasses()
}
} finally {
G.reset()
}

private def configureSoot(): Unit = {
private def configureSoot(config: Config, outDir: File): Unit = {
// set application mode
Options.v().set_app(false)
Options.v().set_whole_program(false)
Expand All @@ -184,12 +137,14 @@ class Jimple2Cpg extends X2CpgFrontend[Config] {
Options.v().setPhaseOption("jb", "use-original-names:true")
// output jimple
Options.v().set_output_format(Options.output_format_jimple)
Options.v().set_output_dir(ProgramHandlingUtil.getUnpackingDir.toString)
}
Options.v().set_output_dir(outDir.canonicalPath)

private def clean(): Unit = {
G.reset()
ProgramHandlingUtil.clean()
}
Options.v().set_dynamic_dir(config.dynamicDirs.asJava)
Options.v().set_dynamic_package(config.dynamicPkgs.asJava)

if (config.fullResolver) {
// full transitive resolution of all references
Options.v().set_full_resolver(true)
}
}
}
Original file line number Diff line number Diff line change
@@ -1,31 +1,36 @@
package io.joern.jimple2cpg.passes

import better.files.File
import io.joern.jimple2cpg.Jimple2Cpg
import io.joern.jimple2cpg.util.ProgramHandlingUtil.ClassFile
import io.joern.x2cpg.datastructures.Global
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.passes.ConcurrentWriterCpgPass
import org.slf4j.LoggerFactory
import soot.Scene

/** Creates the AST layer from the given class file and stores all types in the given global parameter.
* @param classFiles
* List of class files and their fully qualified class names
* @param cpg
* The CPG to add to
*/
class AstCreationPass(filenames: List[String], cpg: Cpg) extends ConcurrentWriterCpgPass[String](cpg) {
class AstCreationPass(classFiles: List[ClassFile], cpg: Cpg) extends ConcurrentWriterCpgPass[ClassFile](cpg) {

val global: Global = new Global()
private val logger = LoggerFactory.getLogger(classOf[AstCreationPass])

override def generateParts(): Array[_ <: AnyRef] = filenames.toArray
override def generateParts(): Array[_ <: AnyRef] = classFiles.toArray

override def runOnPart(builder: DiffGraphBuilder, part: String): Unit = {
val qualifiedClassName = Jimple2Cpg.getQualifiedClassPath(part)
override def runOnPart(builder: DiffGraphBuilder, classFile: ClassFile): Unit = {
try {
val sootClass = Scene.v().loadClassAndSupport(qualifiedClassName)
val sootClass = Scene.v().loadClassAndSupport(classFile.fqcn.get)
sootClass.setApplicationClass()
val localDiff = new AstCreator(part, sootClass, global).createAst()
val localDiff = AstCreator(classFile.file.canonicalPath, sootClass, global).createAst()
builder.absorb(localDiff)
} catch {
case e: Exception =>
logger.warn(s"Cannot parse: $part ($qualifiedClassName)", e)
logger.warn(s"Exception on AST creation for ${classFile.file.canonicalPath}", e)
Iterator()
}
}
Expand Down
Loading