From 2854777465382adbd8c9b0b06fba5c8015d45957 Mon Sep 17 00:00:00 2001 From: Reuben Steenekamp Date: Tue, 4 Jul 2023 11:56:34 +0200 Subject: [PATCH 1/9] Add nix flake devShell. --- .envrc | 1 + .gitignore | 1 + flake.lock | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ flake.nix | 20 ++++++++++++++++++ 4 files changed, 83 insertions(+) create mode 100644 .envrc create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/.envrc b/.envrc new file mode 100644 index 000000000000..3550a30f2de3 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore index e6015a976522..0f5fef418082 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,4 @@ null **/goastgen-macos **/goastgen-macos-arm64 slices.json +.direnv diff --git a/flake.lock b/flake.lock new file mode 100644 index 000000000000..7a097185e1a5 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "locked": { + "lastModified": 1644229661, + "narHash": "sha256-1YdnJAsNy69bpcjuoKdOYQX0YxZBiCYZo4Twxerqv7k=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "3cecb5b042f7f209c56ffd8371b2711a290ec797", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1688231357, + "narHash": "sha256-ZOn16X5jZ6X5ror58gOJAxPfFLAQhZJ6nOUeS4tfFwo=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "645ff62e09d294a30de823cb568e9c6d68e92606", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs", + "utils": "utils" + } + }, + "utils": { + "inputs": { + "flake-utils": "flake-utils" + }, + "locked": { + "lastModified": 1657226504, + "narHash": "sha256-GIYNjuq4mJlFgqKsZ+YrgzWm0IpA4axA3MCrdKYj7gs=", + "owner": "gytis-ivaskevicius", + "repo": "flake-utils-plus", + "rev": "2bf0f91643c2e5ae38c1b26893ac2927ac9bd82a", + "type": "github" + }, + "original": { + "owner": "gytis-ivaskevicius", + "repo": "flake-utils-plus", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 000000000000..701760d2619f --- /dev/null +++ b/flake.nix @@ -0,0 +1,20 @@ +{ + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + utils.url = "github:gytis-ivaskevicius/flake-utils-plus"; + }; + + outputs = { self, nixpkgs, utils, ... }@inputs: + utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { inherit system; }; + in + { + devShell = pkgs.mkShell { + buildInputs = with pkgs; [ + sbt + jdk19 + ]; + }; + }); +} From 85b757c16b55bac02b604260893322db694b8070 Mon Sep 17 00:00:00 2001 From: Reuben Steenekamp Date: Wed, 5 Jul 2023 11:04:19 +0200 Subject: [PATCH 2/9] Add graphviz to flake. --- flake.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/flake.nix b/flake.nix index 701760d2619f..64de0c74f1fe 100644 --- a/flake.nix +++ b/flake.nix @@ -14,6 +14,7 @@ buildInputs = with pkgs; [ sbt jdk19 + graphviz ]; }; }); From 97ec306a176bed66f9b91e34650630468a715877 Mon Sep 17 00:00:00 2001 From: Reuben Steenekamp Date: Wed, 5 Jul 2023 11:02:08 +0200 Subject: [PATCH 3/9] Remove irrelevant argument --- .../jimple2cpg/util/ProgramHandlingUtil.scala | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala index ba53fc5c406b..a182fb8540c9 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala @@ -85,24 +85,18 @@ object ProgramHandlingUtil { * @param sourceCodePath * The project root path to unpack to. */ - def unzipArchive(zf: ZipFile, sourceCodePath: String): List[String] = { + def unzipArchive(zf: ZipFile): List[String] = { val zipTempDir = Files.createTempDirectory("plume-unzip-") try { Using.resource(zf) { (zip: ZipFile) => // Copy zipped files across - return moveClassFiles( + moveClassFiles( zip .entries() .asScala .filter(f => !f.isDirectory && f.getName.endsWith(".class")) .flatMap(entry => { - val sourceCodePathFile = new File(sourceCodePath) - // Handle the case if the input source code path is an archive itself - val destFile = if (sourceCodePathFile.isDirectory) { - new File(zipTempDir.toAbsolutePath.toString + File.separator + entry.getName) - } else { - new File(zipTempDir.toAbsolutePath.toString + File.separator + entry.getName) - } + val destFile = new File(zipTempDir.toAbsolutePath.toString + File.separator + entry.getName) // dirName accounts for nested directories as a result of JAR package structure val dirName = destFile.getAbsolutePath .substring(0, destFile.getAbsolutePath.lastIndexOf(File.separator)) @@ -147,7 +141,7 @@ object ProgramHandlingUtil { } else { SourceFiles.determine(sourceCodeDir, archiveFileExtensions) } - archives.flatMap { x => unzipArchive(new ZipFile(x), sourceCodeDir) } + archives.flatMap { x => unzipArchive(new ZipFile(x)) } } /** Removes all files in the temporary unpacking directory. From a7185b1ff97410871a8231cbe0712472d9a7f2f6 Mon Sep 17 00:00:00 2001 From: Reuben Steenekamp Date: Tue, 11 Jul 2023 08:52:01 +0200 Subject: [PATCH 4/9] Add consistent recursive jar unpacking. Previously jimple2cpg would have two distinct code paths when importing a directory containing a jar (which would unpack the jar) compared to importing a jar directly (which would load it with soot). This change does the following - Always use the unpacking behaviour for jars - Remove jimple from the source files being processed (It can be added back if needed downstream) - Keep the .apk/.dex behaviour if imported directly (but not if contained in a directory). This maintains the existing behaviour. Some remaining work is to: - add docstrings - add unit tests for recursive unpacking - add unit tests for .apk/.dex (maybe) --- .../io/joern/jimple2cpg/Jimple2Cpg.scala | 199 ++++++--------- .../jimple2cpg/passes/AstCreationPass.scala | 19 +- .../jimple2cpg/util/ProgramHandlingUtil.scala | 230 ++++++++---------- .../jimple2cpg/unpacking/JarUnpacking.scala | 3 +- 4 files changed, 197 insertions(+), 254 deletions(-) diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala index 48f1d95ac743..7f56a31ff878 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala @@ -1,20 +1,19 @@ package io.joern.jimple2cpg -import io.joern.jimple2cpg.passes.SootAstCreationPass -import io.joern.jimple2cpg.passes.AstCreationPass -import io.joern.jimple2cpg.util.ProgramHandlingUtil -import io.joern.jimple2cpg.util.ProgramHandlingUtil.{extractSourceFilesFromArchive, moveClassFiles} +import better.files.File +import io.joern.jimple2cpg.passes.{AstCreationPass, SootAstCreationPass} +import io.joern.jimple2cpg.util.ProgramHandlingUtil.{ClassFile, extractClassesInPackageLayout} import io.joern.x2cpg.X2Cpg.withNewEmptyCpg +import io.joern.x2cpg.datastructures.Global import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass} import io.joern.x2cpg.{SourceFiles, X2CpgFrontend} import io.shiftleft.codepropertygraph.Cpg +import org.apache.commons.io.FileUtils import org.slf4j.LoggerFactory import soot.options.Options import soot.{G, PackManager, Scene} -import java.io.{File => JFile} import java.nio.file.Paths -import org.apache.commons.io.FileUtils import scala.jdk.CollectionConverters.{EnumerationHasAsScala, SeqHasAsJava} import scala.language.postfixOps import scala.util.Try @@ -22,39 +21,17 @@ import scala.util.Try object Jimple2Cpg { val language = "JAVA" - /** Formats the file name the way Soot refers to classes within a class path. e.g. - * /unrelated/paths/class/path/Foo.class => class.path.Foo - * - * @param filename - * the file name to transform. - * @return - * the correctly formatted class path. - */ - def getQualifiedClassPath(filename: String): String = { - val codePath = ProgramHandlingUtil.getUnpackingDir - - val codeDir: String = if (codePath.toFile.isDirectory) { - codePath.toAbsolutePath.normalize.toString - } else { - Paths.get(codePath.toFile.getParentFile.getAbsolutePath).normalize.toString - } - filename - .replace(codeDir + JFile.separator, "") - .replace(".class", "") - .replace(JFile.separator, ".") - } - def apply(): Jimple2Cpg = new Jimple2Cpg() } class Jimple2Cpg extends X2CpgFrontend[Config] { - import Jimple2Cpg._ + import Jimple2Cpg.* private val logger = LoggerFactory.getLogger(classOf[Jimple2Cpg]) - def sootLoadApk(input: String, framework: Option[String] = None): Unit = { - Options.v().set_process_dir(List(input).asJava) + def sootLoadApk(input: File, framework: Option[String] = None): Unit = { + Options.v().set_process_dir(List(input.canonicalPath).asJava) framework match { case Some(value) if value.nonEmpty => { Options.v().set_src_prec(Options.src_prec_apk) @@ -75,101 +52,77 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { Options.v().set_src_prec(Options.src_prec_class) } - def sootLoadSource(input: String, ext: String): Unit = { - // Soot does not support loading single class/jimple file using path, so we move it to temp dir first - // NOTE: Soot’s frontend for Java source files is outdated (only partially supports Java version up to 7) and not very robust. - val src = new JFile(input) - val dst = new JFile(ProgramHandlingUtil.getUnpackingDir.toString, src.getName) - val prec = ext match { - case "jimple" => Options.src_prec_jimple - case _ => Options.src_prec_class + /** Load all class files from archives or directories recursively + * @return + * The list of extracted class files and the components of their fully qualified class names + */ + private def loadClassFiles(src: File, tmpDir: File): List[ClassFile] = { + val archiveFileExtensions = Set(".jar", ".war", ".zip") + extractClassesInPackageLayout( + src, + tmpDir, + isClass = e => e.extension.contains(".class"), + isArchive = e => e.extension.exists(archiveFileExtensions.contains) + ) + } + private def sootLoadRecursively(input: File, tmpDir: File, cpg: Cpg, config: Config): List[ClassFile] = { + Options.v().set_soot_classpath(tmpDir.canonicalPath) + Options.v().set_prepend_classpath(true) + val classFiles = loadClassFiles(input, tmpDir) + val fqcns = classFiles.flatMap(_.fqcn) + logger.info(s"Loading ${classFiles.size} program files") + logger.debug(s"Source files are: ${classFiles.map(_.file.canonicalPath)}") + fqcns.foreach { fqcn => + Scene.v().addBasicClass(fqcn) + Scene.v().loadClassAndSupport(fqcn) } - FileUtils.copyFile(src, dst) - Options.v().set_process_dir(List(ProgramHandlingUtil.getUnpackingDir.toString).asJava) - Options.v().set_src_prec(prec) + classFiles } - def createCpg(config: Config): Try[Cpg] = { - val ret = withNewEmptyCpg(config.outputPath, config: Config) { (cpg, config) => - val inputPath = new JFile(config.inputPath) - - configureSoot() - new MetaDataPass(cpg, language, config.inputPath).createAndApply() - Options.v().set_dynamic_dir(config.dynamicDirs.toList.asJava) - Options.v().set_dynamic_package(config.dynamicPkgs.toList.asJava) - if (config.fullResolver) { - // full transitive resolution of all references - Options.v().set_full_resolver(true) - } - if (inputPath.isDirectory()) { - // make sure classpath is configured correctly - Options.v().set_soot_classpath(ProgramHandlingUtil.getUnpackingDir.toString) - Options.v().set_prepend_classpath(true) - val sourceFileExtensions = Set(".class", ".jimple") - val archiveFileExtensions = Set(".jar", ".war") - // Load source files and unpack archives if necessary - val sourceFileNames = loadSourceFiles(config.inputPath, sourceFileExtensions, archiveFileExtensions) - logger.info(s"Loading ${sourceFileNames.size} program files") - logger.debug(s"Source files are: $sourceFileNames") - loadClassesIntoSoot(sourceFileNames) - val astCreator = new AstCreationPass(sourceFileNames, cpg) - astCreator.createAndApply() - TypeNodePass - .withRegisteredTypes(astCreator.global.usedTypes.keys().asScala.toList, cpg) - .createAndApply() - } else { - val ext = config.inputPath.split("\\.").lastOption.getOrElse("") - ext match { - case "jar" | "zip" => sootLoadClass(config.inputPath) - case "apk" | "dex" => sootLoadApk(config.inputPath, config.android) - case "jimple" | "class" => sootLoadSource(config.inputPath, ext) - // case "war" => sootLoadClass(unpackPath/WEB-INF/classes) - case _ => { - logger.warn(s"Don't know how to handle input: $inputPath") - throw new RuntimeException(s"Unsupported input at ${config.inputPath}") - } + private def cpgApplyPasses(cpg: Cpg, config: Config, tmpDir: File): Unit = { + val input = File(config.inputPath) + configureSoot(config, tmpDir) + new MetaDataPass(cpg, language, config.inputPath).createAndApply() + + val globalFromAstCreation: () => Global = input.extension match { + case Some(".apk" | ".dex") if input.isRegularFile => + sootLoadApk(input, config.android) + { () => + val astCreator = SootAstCreationPass(cpg) + astCreator.createAndApply() + astCreator.global + } + case _ => + val classFiles = sootLoadRecursively(input, tmpDir, cpg, config) + { () => + val astCreator = AstCreationPass(classFiles, cpg) + astCreator.createAndApply() + astCreator.global } - logger.info("Loading classes to soot") - Scene.v().loadNecessaryClasses() - logger.info(s"Loaded ${Scene.v().getApplicationClasses().size()} classes") - val astCreator = new SootAstCreationPass(cpg) - astCreator.createAndApply() - TypeNodePass - .withRegisteredTypes(astCreator.global.usedTypes.keys().asScala.toList, cpg) - .createAndApply() - } - - // Clear classes from Soot - G.reset() } - clean() - ret - } - /** Load all source files from archive and/or source file types. - */ - private def loadSourceFiles( - sourceCodePath: String, - sourceFileExtensions: Set[String], - archiveFileExtensions: Set[String] - ): List[String] = { - ( - extractSourceFilesFromArchive(sourceCodePath, archiveFileExtensions) ++ - moveClassFiles(SourceFiles.determine(sourceCodePath, sourceFileExtensions)) - ).distinct + logger.info("Loading classes to soot") + Scene.v().loadNecessaryClasses() + logger.info(s"Loaded ${Scene.v().getApplicationClasses().size()} classes") + + val global = globalFromAstCreation() + TypeNodePass + .withRegisteredTypes(global.usedTypes.keys().asScala.toList, cpg) + .createAndApply() } - private def loadClassesIntoSoot(sourceFileNames: List[String]): Unit = { - sourceFileNames - .map(getQualifiedClassPath) - .foreach { cp => - Scene.v().addBasicClass(cp) - Scene.v().loadClassAndSupport(cp) + def createCpg(config: Config): Try[Cpg] = + try { + withNewEmptyCpg(config.outputPath, config: Config) { (cpg, config) => + File.temporaryDirectory("jimple2cpg-").apply { tmpDir => + cpgApplyPasses(cpg, config, tmpDir) + } } - Scene.v().loadNecessaryClasses() - } + } finally { + G.reset() + } - private def configureSoot(): Unit = { + private def configureSoot(config: Config, outDir: File): Unit = { // set application mode Options.v().set_app(false) Options.v().set_whole_program(false) @@ -184,12 +137,14 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { Options.v().setPhaseOption("jb", "use-original-names:true") // output jimple Options.v().set_output_format(Options.output_format_jimple) - Options.v().set_output_dir(ProgramHandlingUtil.getUnpackingDir.toString) - } + Options.v().set_output_dir(outDir.canonicalPath) - private def clean(): Unit = { - G.reset() - ProgramHandlingUtil.clean() - } + Options.v().set_dynamic_dir(config.dynamicDirs.asJava) + Options.v().set_dynamic_package(config.dynamicPkgs.asJava) + if (config.fullResolver) { + // full transitive resolution of all references + Options.v().set_full_resolver(true) + } + } } diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/passes/AstCreationPass.scala index 12e2a367615e..0b84f2682b4b 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/passes/AstCreationPass.scala @@ -1,6 +1,8 @@ package io.joern.jimple2cpg.passes +import better.files.File import io.joern.jimple2cpg.Jimple2Cpg +import io.joern.jimple2cpg.util.ProgramHandlingUtil.ClassFile import io.joern.x2cpg.datastructures.Global import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.passes.ConcurrentWriterCpgPass @@ -8,24 +10,27 @@ import org.slf4j.LoggerFactory import soot.Scene /** Creates the AST layer from the given class file and stores all types in the given global parameter. + * @param classFiles + * List of class files and their fully qualified class names + * @param cpg + * The CPG to add to */ -class AstCreationPass(filenames: List[String], cpg: Cpg) extends ConcurrentWriterCpgPass[String](cpg) { +class AstCreationPass(classFiles: List[ClassFile], cpg: Cpg) extends ConcurrentWriterCpgPass[ClassFile](cpg) { val global: Global = new Global() private val logger = LoggerFactory.getLogger(classOf[AstCreationPass]) - override def generateParts(): Array[_ <: AnyRef] = filenames.toArray + override def generateParts(): Array[_ <: AnyRef] = classFiles.toArray - override def runOnPart(builder: DiffGraphBuilder, part: String): Unit = { - val qualifiedClassName = Jimple2Cpg.getQualifiedClassPath(part) + override def runOnPart(builder: DiffGraphBuilder, classFile: ClassFile): Unit = { try { - val sootClass = Scene.v().loadClassAndSupport(qualifiedClassName) + val sootClass = Scene.v().loadClassAndSupport(classFile.fqcn.get) sootClass.setApplicationClass() - val localDiff = new AstCreator(part, sootClass, global).createAst() + val localDiff = AstCreator(classFile.file.canonicalPath, sootClass, global).createAst() builder.absorb(localDiff) } catch { case e: Exception => - logger.warn(s"Cannot parse: $part ($qualifiedClassName)", e) + logger.warn(s"Exception on AST creation for ${classFile.file.canonicalPath}", e) Iterator() } } diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala index a182fb8540c9..b001fb82779c 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala @@ -1,16 +1,16 @@ package io.joern.jimple2cpg.util +import better.files.* import io.joern.x2cpg.SourceFiles -import org.apache.commons.io.FileUtils import org.objectweb.asm.ClassReader.SKIP_CODE import org.objectweb.asm.{ClassReader, ClassVisitor, Opcodes} import org.slf4j.LoggerFactory -import java.io.{File, FileInputStream} -import java.nio.file.{Files, Path, Paths, StandardCopyOption} -import java.util.zip.ZipFile +import java.io.FileInputStream +import java.nio.file.Path +import java.util.zip.ZipEntry import scala.jdk.CollectionConverters.EnumerationHasAsScala -import scala.util.Using +import scala.util.{Failure, Left, Success, Try, Using} /** Responsible for handling JAR unpacking and handling the temporary build directory. */ @@ -18,135 +18,119 @@ object ProgramHandlingUtil { private val logger = LoggerFactory.getLogger(ProgramHandlingUtil.getClass) - /** The temporary directory used to unpack class files to. - */ - private var TEMP_DIR: Option[Path] = None + sealed class Entry(entry: Either[File, ZipEntry]) { - logger.debug(s"Using temporary folder at $TEMP_DIR") - - /** Returns the temporary directory used to unpack and analyze projects in. This allows us to lazily create the - * unpacking directory. - * @return - * the path pointing to the unpacking directory. - */ - def getUnpackingDir: Path = - TEMP_DIR match { - case None => - val p = Files.createTempDirectory("joern-") - TEMP_DIR = Some(p) - p - case Some(dir) => dir - } - - /** Inspects class files and moves them to the temp directory based on their package path. - * - * @param files - * the class files to move. - * @return - * the list of class files at their new locations. - */ - def moveClassFiles(files: List[String]): List[String] = { - var destPath: Option[String] = None + def this(file: File) = this(Left(file)) + def this(entry: ZipEntry) = this(Right(entry)) + private def file: File = entry.fold(identity, e => File(e.getName)) + def name: String = file.name + def fullExtension: Option[String] = file.extension(includeAll = true) + def extension: Option[String] = file.extension + def isDirectory: Boolean = entry.fold(_.isDirectory, _.isDirectory) + def maybeRegularFile(): Boolean = entry.fold(_.isRegularFile, !_.isDirectory) + } - sealed class ClassPathVisitor extends ClassVisitor(Opcodes.ASM9) { - override def visit( - version: Int, - access: Int, - name: String, - signature: String, - superName: String, - interfaces: Array[String] - ): Unit = { - destPath = Some(getUnpackingDir.toAbsolutePath.toString + File.separator + name + ".class") - } + private def unfoldArchives[A](src: File, emitOrUnpack: File => Either[A, List[File]]): IterableOnce[A] = { + emitOrUnpack(src) match { + case Left(a) => Seq(a) + case Right(disposeFiles) => disposeFiles.flatMap(x => unfoldArchives(x, emitOrUnpack)) } + } + private def extractClassesToTmp( + src: File, + tmpDir: File, + isArchive: Entry => Boolean, + isSource: Entry => Boolean + ): IterableOnce[ClassFile] = { - files.flatMap { f => - Using.resource(new FileInputStream(f)) { fis => - val cr = new ClassReader(fis) - val rootVisitor = new ClassPathVisitor() - cr.accept(rootVisitor, SKIP_CODE) + def shouldExtract(e: Entry) = e.maybeRegularFile() && (isArchive(e) || isSource(e)) + unfoldArchives( + src, + { + case f if isSource(Entry(f)) => + Left(ClassFile(f)) + case f if f.isDirectory() => + val files = f.listRecursively.filter(!_.isDirectory).toList + Right(files) + case f if isArchive(Entry(f)) => + val xTmp = File.newTemporaryDirectory("extract-archive-", parent = Some(tmpDir)) + Right(Try(f.unzipTo(xTmp, e => shouldExtract(Entry(f)))) match { + case Success(dir) => List(dir) + case Failure(e) => + logger.warn(s"Failed to extract archive", e) + List.empty + }) + case _ => + Right(List.empty) } - destPath match { - case Some(destPath) => - val dstFile = new File(destPath) - dstFile.mkdirs() - Files.copy(Paths.get(f), dstFile.toPath, StandardCopyOption.REPLACE_EXISTING) - Some(dstFile.getAbsolutePath) - case None => None - } - } + ) } - /** Unzips a ZIP file into a sequence of files. All files unpacked are deleted at the end of CPG construction. - * - * @param zf - * The ZIP file to extract. - * @param sourceCodePath - * The project root path to unpack to. - */ - def unzipArchive(zf: ZipFile): List[String] = { - val zipTempDir = Files.createTempDirectory("plume-unzip-") - try { - Using.resource(zf) { (zip: ZipFile) => - // Copy zipped files across - moveClassFiles( - zip - .entries() - .asScala - .filter(f => !f.isDirectory && f.getName.endsWith(".class")) - .flatMap(entry => { - val destFile = new File(zipTempDir.toAbsolutePath.toString + File.separator + entry.getName) - // dirName accounts for nested directories as a result of JAR package structure - val dirName = destFile.getAbsolutePath - .substring(0, destFile.getAbsolutePath.lastIndexOf(File.separator)) - // Create directory path - new File(dirName).mkdirs() - try { - if (destFile.exists()) destFile.delete() - Using.resource(zip.getInputStream(entry)) { input => - Files.copy(input, destFile.toPath) - } - destFile.deleteOnExit() - Option(destFile.getAbsolutePath) - } catch { - case e: Exception => - logger - .warn( - s"Encountered an error while extracting entry ${entry.getName} from archive ${zip.getName}.", - e - ) - Option.empty - } - }) - .toList - ) + object ClassFile { + def getPackagePathFromByteCode(fis: FileInputStream): Option[String] = { + val cr = new ClassReader(fis) + var path: Option[String] = None + sealed class ClassNameVisitor extends ClassVisitor(Opcodes.ASM9) { + override def visit( + version: Int, + access: Int, + name: String, + signature: String, + superName: String, + interfaces: Array[String] + ): Unit = { + path = Some(name) + } } - } catch { - case e: Exception => throw new RuntimeException(s"Error extracting files from archive at ${zf.getName}", e) - } finally { - FileUtils.deleteDirectory(zipTempDir.toFile) + val rootVisitor = new ClassNameVisitor() + cr.accept(rootVisitor, SKIP_CODE) + path } - } - /** Retrieve parseable files from archive types. - */ - def extractSourceFilesFromArchive(sourceCodeDir: String, archiveFileExtensions: Set[String]): List[String] = { - val archives = - if ( - new File(sourceCodeDir).isFile && - archiveFileExtensions.map(sourceCodeDir.endsWith).reduce((a, b) => a && b) - ) { - List(sourceCodeDir) - } else { - SourceFiles.determine(sourceCodeDir, archiveFileExtensions) - } - archives.flatMap { x => unzipArchive(new ZipFile(x)) } + def getPackagePathFromByteCode(file: File): Option[String] = + Try(file.fileInputStream.apply(getPackagePathFromByteCode)) + .recover { + case e: Throwable => { + logger.warn(s"Error reading class file ${file.canonicalPath}", e) + None + } + } + .getOrElse(None) } + sealed class ClassFile(val file: File, val packagePath: Option[String]) { + def this(file: File) = this(file, ClassFile.getPackagePathFromByteCode(file)) + + // Test that the path separator is always unix + val components: Option[Array[String]] = packagePath.map(_.split("/")) - /** Removes all files in the temporary unpacking directory. - */ - def clean(): Unit = { - FileUtils.deleteDirectory(getUnpackingDir.toFile) + val fqcn: Option[String] = components.map(_.mkString(".")) + def moveToPackageLayoutIn(destDir: File): Option[ClassFile] = + packagePath + .map { path => + val destClass = File(destDir, path + ".class") + if (destClass.exists()) { + logger.warn(s"Overwriting class file: ${destClass.path.toAbsolutePath}") + } + destClass.parent.createDirectories(); + ClassFile(file.moveTo(destClass)(File.CopyOptions(overwrite = true)), packagePath) + } + .orElse { + logger.warn(s"Missing package path for ${file.canonicalPath}. Failed to move to ${destDir.canonicalPath}") + None + } } + def extractClassesInPackageLayout( + src: File, + destDir: File, + isClass: Entry => Boolean, + isArchive: Entry => Boolean + ): List[ClassFile] = + File + .temporaryDirectory("extract-classes-") + .apply(tmpDir => + extractClassesToTmp(src, tmpDir, isArchive, isClass).iterator + .flatMap(_.moveToPackageLayoutIn(destDir)) + .toList + ) + } diff --git a/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala b/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala index 54e59c4cb2ea..9c8eb4e325ee 100644 --- a/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala +++ b/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala @@ -28,7 +28,7 @@ class JarUnpacking extends AnyWordSpec with Matchers with BeforeAndAfterAll { } } - "should extract files and clean up temp directory" in { + "'resources/unpacking' should contain 'HelloWorld.jar'" in { val targetDir = ProjectRoot.relativise("joern-cli/frontends/jimple2cpg/src/test/resources/unpacking") Files .walk(Path.of(targetDir)) @@ -36,7 +36,6 @@ class JarUnpacking extends AnyWordSpec with Matchers with BeforeAndAfterAll { .filter(f => f.isFile && f.getName.contains(".jar")) .map(_.getName) .toArray shouldBe Array("HelloWorld.jar") - ProgramHandlingUtil.getUnpackingDir.toFile.length() shouldBe 0 } "should reflect the correct package order" in { From b7c3ca79975daad55a5756e1485934113bc576f6 Mon Sep 17 00:00:00 2001 From: Reuben Steenekamp Date: Tue, 11 Jul 2023 10:18:03 +0200 Subject: [PATCH 5/9] Remove flake files --- .envrc | 1 - .gitignore | 7 +++++++ flake.lock | 61 ------------------------------------------------------ flake.nix | 21 ------------------- 4 files changed, 7 insertions(+), 83 deletions(-) delete mode 100644 .envrc delete mode 100644 flake.lock delete mode 100644 flake.nix diff --git a/.envrc b/.envrc deleted file mode 100644 index 3550a30f2de3..000000000000 --- a/.envrc +++ /dev/null @@ -1 +0,0 @@ -use flake diff --git a/.gitignore b/.gitignore index 0f5fef418082..65cebbce527b 100644 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,11 @@ null **/goastgen-macos **/goastgen-macos-arm64 slices.json + +############## +# Nix flakes # +############## +.envrc +flake.nix +flake.lock .direnv diff --git a/flake.lock b/flake.lock deleted file mode 100644 index 7a097185e1a5..000000000000 --- a/flake.lock +++ /dev/null @@ -1,61 +0,0 @@ -{ - "nodes": { - "flake-utils": { - "locked": { - "lastModified": 1644229661, - "narHash": "sha256-1YdnJAsNy69bpcjuoKdOYQX0YxZBiCYZo4Twxerqv7k=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "3cecb5b042f7f209c56ffd8371b2711a290ec797", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1688231357, - "narHash": "sha256-ZOn16X5jZ6X5ror58gOJAxPfFLAQhZJ6nOUeS4tfFwo=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "645ff62e09d294a30de823cb568e9c6d68e92606", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "root": { - "inputs": { - "nixpkgs": "nixpkgs", - "utils": "utils" - } - }, - "utils": { - "inputs": { - "flake-utils": "flake-utils" - }, - "locked": { - "lastModified": 1657226504, - "narHash": "sha256-GIYNjuq4mJlFgqKsZ+YrgzWm0IpA4axA3MCrdKYj7gs=", - "owner": "gytis-ivaskevicius", - "repo": "flake-utils-plus", - "rev": "2bf0f91643c2e5ae38c1b26893ac2927ac9bd82a", - "type": "github" - }, - "original": { - "owner": "gytis-ivaskevicius", - "repo": "flake-utils-plus", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/flake.nix b/flake.nix deleted file mode 100644 index 64de0c74f1fe..000000000000 --- a/flake.nix +++ /dev/null @@ -1,21 +0,0 @@ -{ - inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; - utils.url = "github:gytis-ivaskevicius/flake-utils-plus"; - }; - - outputs = { self, nixpkgs, utils, ... }@inputs: - utils.lib.eachDefaultSystem (system: - let - pkgs = import nixpkgs { inherit system; }; - in - { - devShell = pkgs.mkShell { - buildInputs = with pkgs; [ - sbt - jdk19 - graphviz - ]; - }; - }); -} From 03075f83252e4be74a32ed987dd77fbe000cdec2 Mon Sep 17 00:00:00 2001 From: Reuben Steenekamp Date: Tue, 11 Jul 2023 10:40:50 +0200 Subject: [PATCH 6/9] PR review changes --- .../io/joern/jimple2cpg/Jimple2Cpg.scala | 8 +++---- .../jimple2cpg/passes/AstCreationPass.scala | 2 +- .../jimple2cpg/util/ProgramHandlingUtil.scala | 21 ++++++++++--------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala index 7f56a31ff878..c60a75e062a2 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala @@ -61,18 +61,18 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { extractClassesInPackageLayout( src, tmpDir, - isClass = e => e.extension.contains(".class"), + isClass = e => e.extension.exists(_ == ".class"), isArchive = e => e.extension.exists(archiveFileExtensions.contains) ) } private def sootLoadRecursively(input: File, tmpDir: File, cpg: Cpg, config: Config): List[ClassFile] = { Options.v().set_soot_classpath(tmpDir.canonicalPath) Options.v().set_prepend_classpath(true) - val classFiles = loadClassFiles(input, tmpDir) - val fqcns = classFiles.flatMap(_.fqcn) + val classFiles = loadClassFiles(input, tmpDir) + val fullyQualifiedClassNames = classFiles.flatMap(_.fullyQualifiedClassName) logger.info(s"Loading ${classFiles.size} program files") logger.debug(s"Source files are: ${classFiles.map(_.file.canonicalPath)}") - fqcns.foreach { fqcn => + fullyQualifiedClassNames.foreach { fqcn => Scene.v().addBasicClass(fqcn) Scene.v().loadClassAndSupport(fqcn) } diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/passes/AstCreationPass.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/passes/AstCreationPass.scala index 0b84f2682b4b..bef446579379 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/passes/AstCreationPass.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/passes/AstCreationPass.scala @@ -24,7 +24,7 @@ class AstCreationPass(classFiles: List[ClassFile], cpg: Cpg) extends ConcurrentW override def runOnPart(builder: DiffGraphBuilder, classFile: ClassFile): Unit = { try { - val sootClass = Scene.v().loadClassAndSupport(classFile.fqcn.get) + val sootClass = Scene.v().loadClassAndSupport(classFile.fullyQualifiedClassName.get) sootClass.setApplicationClass() val localDiff = AstCreator(classFile.file.canonicalPath, sootClass, global).createAst() builder.absorb(localDiff) diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala index b001fb82779c..bdf43ec59df5 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala @@ -50,16 +50,17 @@ object ProgramHandlingUtil { case f if isSource(Entry(f)) => Left(ClassFile(f)) case f if f.isDirectory() => - val files = f.listRecursively.filter(!_.isDirectory).toList + val files = f.listRecursively.filterNot(_.isDirectory).toList Right(files) case f if isArchive(Entry(f)) => val xTmp = File.newTemporaryDirectory("extract-archive-", parent = Some(tmpDir)) - Right(Try(f.unzipTo(xTmp, e => shouldExtract(Entry(f)))) match { + val unzipDirs = Try(f.unzipTo(xTmp, e => shouldExtract(Entry(f)))) match { case Success(dir) => List(dir) case Failure(e) => logger.warn(s"Failed to extract archive", e) List.empty - }) + } + Right(unzipDirs) case _ => Right(List.empty) } @@ -68,9 +69,9 @@ object ProgramHandlingUtil { object ClassFile { def getPackagePathFromByteCode(fis: FileInputStream): Option[String] = { - val cr = new ClassReader(fis) - var path: Option[String] = None + val cr = new ClassReader(fis) sealed class ClassNameVisitor extends ClassVisitor(Opcodes.ASM9) { + var path: Option[String] = None override def visit( version: Int, access: Int, @@ -84,14 +85,14 @@ object ProgramHandlingUtil { } val rootVisitor = new ClassNameVisitor() cr.accept(rootVisitor, SKIP_CODE) - path + rootVisitor.path } def getPackagePathFromByteCode(file: File): Option[String] = Try(file.fileInputStream.apply(getPackagePathFromByteCode)) .recover { case e: Throwable => { - logger.warn(s"Error reading class file ${file.canonicalPath}", e) + logger.error(s"Error reading class file ${file.canonicalPath}", e) None } } @@ -100,14 +101,14 @@ object ProgramHandlingUtil { sealed class ClassFile(val file: File, val packagePath: Option[String]) { def this(file: File) = this(file, ClassFile.getPackagePathFromByteCode(file)) - // Test that the path separator is always unix + // TODO: Test that the path separator is always unix val components: Option[Array[String]] = packagePath.map(_.split("/")) - val fqcn: Option[String] = components.map(_.mkString(".")) + val fullyQualifiedClassName: Option[String] = components.map(_.mkString(".")) def moveToPackageLayoutIn(destDir: File): Option[ClassFile] = packagePath .map { path => - val destClass = File(destDir, path + ".class") + val destClass = destDir / s"${path}.class" if (destClass.exists()) { logger.warn(s"Overwriting class file: ${destClass.path.toAbsolutePath}") } From 728897e7d747fc72a3a935c3aa8ca66d6802670c Mon Sep 17 00:00:00 2001 From: Reuben Steenekamp Date: Tue, 11 Jul 2023 11:49:09 +0200 Subject: [PATCH 7/9] Add javadocs and use copy instead of move Since we recursively search, the class files found may include those in the original source directory, which we do not want to move. So we just copy instead. --- .../io/joern/jimple2cpg/Jimple2Cpg.scala | 30 +++++---- .../jimple2cpg/util/ProgramHandlingUtil.scala | 63 +++++++++++++++---- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala index c60a75e062a2..8a3c3e0ab532 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala @@ -47,15 +47,11 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { Options.v().setPhaseOption("jb", "use-original-names:false") } - def sootLoadClass(inputDir: String): Unit = { - Options.v().set_process_dir(List(inputDir).asJava) - Options.v().set_src_prec(Options.src_prec_class) - } - /** Load all class files from archives or directories recursively - * @return - * The list of extracted class files and the components of their fully qualified class names - */ + * @return + * The list of extracted class files whose package path could be + * extracted, placed on that package path relative to [[tmpDir]] + */ private def loadClassFiles(src: File, tmpDir: File): List[ClassFile] = { val archiveFileExtensions = Set(".jar", ".war", ".zip") extractClassesInPackageLayout( @@ -65,7 +61,13 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { isArchive = e => e.extension.exists(archiveFileExtensions.contains) ) } - private def sootLoadRecursively(input: File, tmpDir: File, cpg: Cpg, config: Config): List[ClassFile] = { + + /** + * Extract all class files found, place them in their package layout and load them into soot. + * @param input The file/directory to traverse for class files. + * @param tmpDir The directory to place the class files in their package layout + */ + private def sootLoadRecursively(input: File, tmpDir: File): List[ClassFile] = { Options.v().set_soot_classpath(tmpDir.canonicalPath) Options.v().set_prepend_classpath(true) val classFiles = loadClassFiles(input, tmpDir) @@ -79,6 +81,12 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { classFiles } + /** + * Apply the soot passes + * @param cpg + * @param config + * @param tmpDir A temporary directory that will be used as the classpath for extracted class files + */ private def cpgApplyPasses(cpg: Cpg, config: Config, tmpDir: File): Unit = { val input = File(config.inputPath) configureSoot(config, tmpDir) @@ -93,7 +101,7 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { astCreator.global } case _ => - val classFiles = sootLoadRecursively(input, tmpDir, cpg, config) + val classFiles = sootLoadRecursively(input, tmpDir) { () => val astCreator = AstCreationPass(classFiles, cpg) astCreator.createAndApply() @@ -111,7 +119,7 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { .createAndApply() } - def createCpg(config: Config): Try[Cpg] = + override def createCpg(config: Config): Try[Cpg] = try { withNewEmptyCpg(config.outputPath, config: Config) { (cpg, config) => File.temporaryDirectory("jimple2cpg-").apply { tmpDir => diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala index bdf43ec59df5..852806bf2b20 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala @@ -6,7 +6,7 @@ import org.objectweb.asm.ClassReader.SKIP_CODE import org.objectweb.asm.{ClassReader, ClassVisitor, Opcodes} import org.slf4j.LoggerFactory -import java.io.FileInputStream +import java.io.{FileInputStream, InputStream} import java.nio.file.Path import java.util.zip.ZipEntry import scala.jdk.CollectionConverters.EnumerationHasAsScala @@ -18,6 +18,10 @@ object ProgramHandlingUtil { private val logger = LoggerFactory.getLogger(ProgramHandlingUtil.getClass) + /** Common properties of a File and ZipEntry, used to + * determine whether a file in a directory or an entry + * in an archive is worth emitting/extracting + */ sealed class Entry(entry: Either[File, ZipEntry]) { def this(file: File) = this(Left(file)) @@ -30,24 +34,41 @@ object ProgramHandlingUtil { def maybeRegularFile(): Boolean = entry.fold(_.isRegularFile, !_.isDirectory) } + /** Process files that may lead to more files to process or to emit a resulting value of [[A]] + * + * @param src The file/directory to traverse + * @param emitOrUnpack A function that takes a file and either emits a value or returns more files to traverse + * @tparam A The type of emitted values + * @return The emitted values + */ private def unfoldArchives[A](src: File, emitOrUnpack: File => Either[A, List[File]]): IterableOnce[A] = { + // TODO: add recursion depth limit emitOrUnpack(src) match { case Left(a) => Seq(a) case Right(disposeFiles) => disposeFiles.flatMap(x => unfoldArchives(x, emitOrUnpack)) } } + + /** Find
.class
files, including those inside archives. + * + * @param src The file/directory to search. + * @param tmpDir A temporary directory for extracted archives + * @param isArchive Whether an entry is an archive to extract + * @param isClass Whether an entry is a class file + * @return The list of class files found, which may either be in [[src]] or in an extracted archive under [[tmpDir]] + */ private def extractClassesToTmp( src: File, tmpDir: File, isArchive: Entry => Boolean, - isSource: Entry => Boolean + isClass: Entry => Boolean ): IterableOnce[ClassFile] = { - def shouldExtract(e: Entry) = e.maybeRegularFile() && (isArchive(e) || isSource(e)) + def shouldExtract(e: Entry) = e.maybeRegularFile() && (isArchive(e) || isClass(e)) unfoldArchives( src, { - case f if isSource(Entry(f)) => + case f if isClass(Entry(f)) => Left(ClassFile(f)) case f if f.isDirectory() => val files = f.listRecursively.filterNot(_.isDirectory).toList @@ -68,8 +89,8 @@ object ProgramHandlingUtil { } object ClassFile { - def getPackagePathFromByteCode(fis: FileInputStream): Option[String] = { - val cr = new ClassReader(fis) + def getPackagePathFromByteCode(is: InputStream): Option[String] = { + val cr = new ClassReader(is) sealed class ClassNameVisitor extends ClassVisitor(Opcodes.ASM9) { var path: Option[String] = None override def visit( @@ -88,6 +109,11 @@ object ProgramHandlingUtil { rootVisitor.path } + /** Attempt to retrieve the package path from JVM bytecode. + * + * @param file The class file + * @return The package path if successfully retrieved + */ def getPackagePathFromByteCode(file: File): Option[String] = Try(file.fileInputStream.apply(getPackagePathFromByteCode)) .recover { @@ -101,11 +127,16 @@ object ProgramHandlingUtil { sealed class ClassFile(val file: File, val packagePath: Option[String]) { def this(file: File) = this(file, ClassFile.getPackagePathFromByteCode(file)) - // TODO: Test that the path separator is always unix val components: Option[Array[String]] = packagePath.map(_.split("/")) val fullyQualifiedClassName: Option[String] = components.map(_.mkString(".")) - def moveToPackageLayoutIn(destDir: File): Option[ClassFile] = + + /** Copy the class file to its package path relative to [[destDir]]. + * This will overwrite a class file at the destination if it exists. + * @param destDir The directory in which to place the class file + * @return The class file at the destination if the package path could be retrieved from the its bytecode + */ + def copyToPackageLayoutIn(destDir: File): Option[ClassFile] = packagePath .map { path => val destClass = destDir / s"${path}.class" @@ -113,13 +144,23 @@ object ProgramHandlingUtil { logger.warn(s"Overwriting class file: ${destClass.path.toAbsolutePath}") } destClass.parent.createDirectories(); - ClassFile(file.moveTo(destClass)(File.CopyOptions(overwrite = true)), packagePath) + ClassFile(file.copyTo(destClass)(File.CopyOptions(overwrite = true)), packagePath) } .orElse { - logger.warn(s"Missing package path for ${file.canonicalPath}. Failed to move to ${destDir.canonicalPath}") + logger.warn(s"Missing package path for ${file.canonicalPath}. Failed to copy to ${destDir.canonicalPath}") None } } + + /** Find
.class
files, including those inside archives and copy them to their + * package path location relative to [[destDir]] + * + * @param src The file/directory to search. + * @param destDir The directory in which to place the class files + * @param isArchive Whether an entry is an archive to extract + * @param isClass Whether an entry is a class file + * @return The copied class files in destDir + */ def extractClassesInPackageLayout( src: File, destDir: File, @@ -130,7 +171,7 @@ object ProgramHandlingUtil { .temporaryDirectory("extract-classes-") .apply(tmpDir => extractClassesToTmp(src, tmpDir, isArchive, isClass).iterator - .flatMap(_.moveToPackageLayoutIn(destDir)) + .flatMap(_.copyToPackageLayoutIn(destDir)) .toList ) From e4a7b29746fc77cc9bc2986c474bf8886f9ff25e Mon Sep 17 00:00:00 2001 From: Reuben Steenekamp Date: Tue, 11 Jul 2023 14:49:51 +0200 Subject: [PATCH 8/9] Add tests and prevent slipzip vuln. --- .../io/joern/jimple2cpg/Jimple2Cpg.scala | 31 +++--- .../jimple2cpg/util/ProgramHandlingUtil.scala | 101 +++++++++++------- .../resources/unpacking/NestedHelloWorld.jar | Bin 0 -> 1360 bytes .../resources/unpacking/helloworld/Bar.class | Bin 0 -> 338 bytes .../resources/unpacking/helloworld/Foo.class | Bin 0 -> 330 bytes .../src/test/resources/unpacking/slippy.zip | Bin 0 -> 904 bytes .../jimple2cpg/unpacking/JarUnpacking.scala | 64 ++++++----- 7 files changed, 118 insertions(+), 78 deletions(-) create mode 100644 joern-cli/frontends/jimple2cpg/src/test/resources/unpacking/NestedHelloWorld.jar create mode 100644 joern-cli/frontends/jimple2cpg/src/test/resources/unpacking/helloworld/Bar.class create mode 100644 joern-cli/frontends/jimple2cpg/src/test/resources/unpacking/helloworld/Foo.class create mode 100644 joern-cli/frontends/jimple2cpg/src/test/resources/unpacking/slippy.zip diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala index 8a3c3e0ab532..303317ef60f5 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/Jimple2Cpg.scala @@ -48,10 +48,10 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { } /** Load all class files from archives or directories recursively - * @return - * The list of extracted class files whose package path could be - * extracted, placed on that package path relative to [[tmpDir]] - */ + * @return + * The list of extracted class files whose package path could be extracted, placed on that package path relative to + * [[tmpDir]] + */ private def loadClassFiles(src: File, tmpDir: File): List[ClassFile] = { val archiveFileExtensions = Set(".jar", ".war", ".zip") extractClassesInPackageLayout( @@ -62,11 +62,12 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { ) } - /** - * Extract all class files found, place them in their package layout and load them into soot. - * @param input The file/directory to traverse for class files. - * @param tmpDir The directory to place the class files in their package layout - */ + /** Extract all class files found, place them in their package layout and load them into soot. + * @param input + * The file/directory to traverse for class files. + * @param tmpDir + * The directory to place the class files in their package layout + */ private def sootLoadRecursively(input: File, tmpDir: File): List[ClassFile] = { Options.v().set_soot_classpath(tmpDir.canonicalPath) Options.v().set_prepend_classpath(true) @@ -81,12 +82,12 @@ class Jimple2Cpg extends X2CpgFrontend[Config] { classFiles } - /** - * Apply the soot passes - * @param cpg - * @param config - * @param tmpDir A temporary directory that will be used as the classpath for extracted class files - */ + /** Apply the soot passes + * @param cpg + * @param config + * @param tmpDir + * A temporary directory that will be used as the classpath for extracted class files + */ private def cpgApplyPasses(cpg: Cpg, config: Config, tmpDir: File): Unit = { val input = File(config.inputPath) configureSoot(config, tmpDir) diff --git a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala index 852806bf2b20..db3ca2320d32 100644 --- a/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala +++ b/joern-cli/frontends/jimple2cpg/src/main/scala/io/joern/jimple2cpg/util/ProgramHandlingUtil.scala @@ -9,7 +9,7 @@ import org.slf4j.LoggerFactory import java.io.{FileInputStream, InputStream} import java.nio.file.Path import java.util.zip.ZipEntry -import scala.jdk.CollectionConverters.EnumerationHasAsScala +import scala.jdk.CollectionConverters.{CollectionHasAsScala, IterableHasAsScala} import scala.util.{Failure, Left, Success, Try, Using} /** Responsible for handling JAR unpacking and handling the temporary build directory. @@ -18,10 +18,9 @@ object ProgramHandlingUtil { private val logger = LoggerFactory.getLogger(ProgramHandlingUtil.getClass) - /** Common properties of a File and ZipEntry, used to - * determine whether a file in a directory or an entry - * in an archive is worth emitting/extracting - */ + /** Common properties of a File and ZipEntry, used to determine whether a file in a directory or an entry in an + * archive is worth emitting/extracting + */ sealed class Entry(entry: Either[File, ZipEntry]) { def this(file: File) = this(Left(file)) @@ -32,15 +31,27 @@ object ProgramHandlingUtil { def extension: Option[String] = file.extension def isDirectory: Boolean = entry.fold(_.isDirectory, _.isDirectory) def maybeRegularFile(): Boolean = entry.fold(_.isRegularFile, !_.isDirectory) + + /** Determines whether a zip entry is potentially malicious. + * @return + * whether the entry is a ZipEntry and uses '..' in it's components + */ + // Note that we consider either type of path separator as although the spec say that only + // unix separators are to be used, zip files in the wild may vary. + def isZipSlip: Boolean = entry.fold(_ => false, _.getName.split("[/\\\\]").contains("..")) } /** Process files that may lead to more files to process or to emit a resulting value of [[A]] - * - * @param src The file/directory to traverse - * @param emitOrUnpack A function that takes a file and either emits a value or returns more files to traverse - * @tparam A The type of emitted values - * @return The emitted values - */ + * + * @param src + * The file/directory to traverse + * @param emitOrUnpack + * A function that takes a file and either emits a value or returns more files to traverse + * @tparam A + * The type of emitted values + * @return + * The emitted values + */ private def unfoldArchives[A](src: File, emitOrUnpack: File => Either[A, List[File]]): IterableOnce[A] = { // TODO: add recursion depth limit emitOrUnpack(src) match { @@ -50,13 +61,18 @@ object ProgramHandlingUtil { } /** Find
.class
files, including those inside archives. - * - * @param src The file/directory to search. - * @param tmpDir A temporary directory for extracted archives - * @param isArchive Whether an entry is an archive to extract - * @param isClass Whether an entry is a class file - * @return The list of class files found, which may either be in [[src]] or in an extracted archive under [[tmpDir]] - */ + * + * @param src + * The file/directory to search. + * @param tmpDir + * A temporary directory for extracted archives + * @param isArchive + * Whether an entry is an archive to extract + * @param isClass + * Whether an entry is a class file + * @return + * The list of class files found, which may either be in [[src]] or in an extracted archive under [[tmpDir]] + */ private def extractClassesToTmp( src: File, tmpDir: File, @@ -64,7 +80,7 @@ object ProgramHandlingUtil { isClass: Entry => Boolean ): IterableOnce[ClassFile] = { - def shouldExtract(e: Entry) = e.maybeRegularFile() && (isArchive(e) || isClass(e)) + def shouldExtract(e: Entry) = !e.isZipSlip && e.maybeRegularFile() && (isArchive(e) || isClass(e)) unfoldArchives( src, { @@ -75,7 +91,7 @@ object ProgramHandlingUtil { Right(files) case f if isArchive(Entry(f)) => val xTmp = File.newTemporaryDirectory("extract-archive-", parent = Some(tmpDir)) - val unzipDirs = Try(f.unzipTo(xTmp, e => shouldExtract(Entry(f)))) match { + val unzipDirs = Try(f.unzipTo(xTmp, e => shouldExtract(Entry(e)))) match { case Success(dir) => List(dir) case Failure(e) => logger.warn(s"Failed to extract archive", e) @@ -110,10 +126,12 @@ object ProgramHandlingUtil { } /** Attempt to retrieve the package path from JVM bytecode. - * - * @param file The class file - * @return The package path if successfully retrieved - */ + * + * @param file + * The class file + * @return + * The package path if successfully retrieved + */ def getPackagePathFromByteCode(file: File): Option[String] = Try(file.fileInputStream.apply(getPackagePathFromByteCode)) .recover { @@ -131,11 +149,13 @@ object ProgramHandlingUtil { val fullyQualifiedClassName: Option[String] = components.map(_.mkString(".")) - /** Copy the class file to its package path relative to [[destDir]]. - * This will overwrite a class file at the destination if it exists. - * @param destDir The directory in which to place the class file - * @return The class file at the destination if the package path could be retrieved from the its bytecode - */ + /** Copy the class file to its package path relative to [[destDir]]. This will overwrite a class file at the + * destination if it exists. + * @param destDir + * The directory in which to place the class file + * @return + * The class file at the destination if the package path could be retrieved from the its bytecode + */ def copyToPackageLayoutIn(destDir: File): Option[ClassFile] = packagePath .map { path => @@ -152,15 +172,20 @@ object ProgramHandlingUtil { } } - /** Find
.class
files, including those inside archives and copy them to their - * package path location relative to [[destDir]] - * - * @param src The file/directory to search. - * @param destDir The directory in which to place the class files - * @param isArchive Whether an entry is an archive to extract - * @param isClass Whether an entry is a class file - * @return The copied class files in destDir - */ + /** Find
.class
files, including those inside archives and copy them to their package path location + * relative to [[destDir]] + * + * @param src + * The file/directory to search. + * @param destDir + * The directory in which to place the class files + * @param isArchive + * Whether an entry is an archive to extract + * @param isClass + * Whether an entry is a class file + * @return + * The copied class files in destDir + */ def extractClassesInPackageLayout( src: File, destDir: File, diff --git a/joern-cli/frontends/jimple2cpg/src/test/resources/unpacking/NestedHelloWorld.jar b/joern-cli/frontends/jimple2cpg/src/test/resources/unpacking/NestedHelloWorld.jar new file mode 100644 index 0000000000000000000000000000000000000000..74d5f7083f68eed6ea16dd94f080ff1e1060585a GIT binary patch literal 1360 zcmWIWW@Zs#U|`^2NJ@SkcFk$`Mq4HZhSkgr4D1Xt3@NDvdRd7@p&^_M%;kD(Q`P}- zX$3a}Bg+e*3YHY0M(z)3X%CYa8js}sOkh(va8lv$gV$1zR=j@se3`t|p_2#Bs4ba( zV%>JLrCXY2H%@%YYRt^+%@vTyaED{-lk15m+%_=uYIyjxy&i~m zm72T!raCYN+cMh_aSbPc*c7e}2w&e904=od-Xz-|Va1aeCdt`!9WS&RkaAo-b$r`Q7>b zcdXdvJ1ulQbNqI^OitOYZef$J_qt!yz+im==%M0 z+NON2`%j9#{dt%8WcP&n5R2Szm)!HS?DAs&l%Cx#^!hQ++xulQGZJF{ygah}-~S%Q zdp$lyXLblpVVRFUIqaD+!6fs^f`@Om-F-3XmT#8gulYL)ls-P-|KKi}F7B7N>GQL*d(4gP z_s8F7P_y8_=wRIN;y|d{+x^ ziCg6TbwibR=vS4?P1?;zQZ_}L@QSX}==v49UwBW;UST=K&vR}X^tl^0+Wm=+;F*`Kr^(C3%N5|w$Rx*% zs}zv{ITQ?*G=eC2nZgPwQ_$iQ*)YsRfoxbEu%JRpB0%G?BoByjxIznJ95A^tENR>V dEYYxs9mr&0*kL%7l?~({W+40qq+M7*JOFfi2j>6) literal 0 HcmV?d00001 diff --git a/joern-cli/frontends/jimple2cpg/src/test/resources/unpacking/helloworld/Bar.class b/joern-cli/frontends/jimple2cpg/src/test/resources/unpacking/helloworld/Bar.class new file mode 100644 index 0000000000000000000000000000000000000000..37dea49844e8c01dc5e01d0e9a543850403dd25a GIT binary patch literal 338 zcmYLDO-sX25S-0t6RWND0EY5&chIJP98B0sbg) zURvA(#)7TN5_kIqJ@90*J%&eJcigM&q<7!nU#Cp{Qe(4@ZbxEEA^tHQdJMpO}Z+wM&MuSN;h*sXFOR3 z=(1QCBXF-wUI+$>u8QmRQ(Dw_DlOR=B_>nlQq|h>zoy$f>SuwMP&=cA%5$=D98Y3_ zyfQ4l3Bp^muCrpHtyP!X7xpBaA>gIN}7Z#K^#)#2~|vk(!f}U!Gr-lcKMur;o&QN-WY# z&PgmT4h`XCVCE3iOWdxzHl?(Jn}Lz#1v3K!OA64GSijwVhaCjw^qw!vVd8uk=-@g% z;DK3?64!(YI*|blm*&qEj!DURa)aYvXPZ_Kmt8}@)0^0sr%P_%JbvcgpMSY^3_gWI ziucTZ8IjXVF#pj6X?vtK%0zs{0wGVk{bp+Dv`UClQsb=)pm$nn=j z{MEin-_Wlrm*-d?UUDYIZu(TS(%T|myW<3#mR$9(e7?$)^PNWDal=NtAJTtRTKZFi zQ(yT>r%t~aeoRX5!RF-8Rtk@2CLahr;?eW&snnO;e=9T=p$`X+t)3=9k}LP6@5 zpN|xzY`B8d*Z+`%KrQoaF)gM?#~uCe9cbP=$zjio2_~6O7Cd~j?e2?7w|uh{f6d=f zp!D$p{|9%;baB7DO`o5g-D7TSzd!yygPH~ZMF-=C7YCaCU+6!La%Tb=v% zX$$pPlzn45mVC@?*9+M*$zit_KKk^k^7DD^Ip34+yUVNR?L5$6y!+FYm*@X61b8zt z$uZ+fqY}WNg@7fEAQ~x+;!LrukQ9rST9NI+j5uU_f`L(q(;lQ~1lj|NMhtsc*+4F3 N0>TSGdJz+d2LMs1T(JNE literal 0 HcmV?d00001 diff --git a/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala b/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala index 9c8eb4e325ee..6c364b00776b 100644 --- a/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala +++ b/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala @@ -1,13 +1,14 @@ package io.joern.jimple2cpg.unpacking +import better.files.File import io.joern.jimple2cpg.{Config, Jimple2Cpg} import io.joern.jimple2cpg.util.ProgramHandlingUtil import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.language.* import io.shiftleft.utils.ProjectRoot import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.must.Matchers -import org.scalatest.matchers.should.Matchers._ +import org.scalatest.matchers.should.Matchers.* import org.scalatest.wordspec.AnyWordSpec import java.nio.file.{Files, Path, Paths} @@ -15,42 +16,55 @@ import scala.util.{Failure, Success, Try} class JarUnpacking extends AnyWordSpec with Matchers with BeforeAndAfterAll { - var cpg: Cpg = _ + var validCpgs: Map[String, Cpg] = _ + var slippyCpg: Cpg = _ override protected def beforeAll(): Unit = { super.beforeAll() - Try(getClass.getResource("/unpacking").toURI) match { + validCpgs = List("HelloWorld.jar", "NestedHelloWorld.jar", "helloworld") + .map(k => (k, getUnpackingCpg(k))) + .toMap + slippyCpg = getUnpackingCpg("slippy.zip") + } + + private def getUnpackingCpg(path: String): Cpg = + Try(getClass.getResource(s"/unpacking/${path}").toURI) match { case Success(x) => implicit val defaultConfig: Config = Config() - cpg = new Jimple2Cpg().createCpg(Paths.get(x).toString).get + new Jimple2Cpg().createCpg(Paths.get(x).toString).get case Failure(x: Throwable) => fail("Unable to obtain test resources.", x) } - } - "'resources/unpacking' should contain 'HelloWorld.jar'" in { + "'resources/unpacking/helloworld' should contain 'HelloWorld.jar'" in { val targetDir = ProjectRoot.relativise("joern-cli/frontends/jimple2cpg/src/test/resources/unpacking") - Files - .walk(Path.of(targetDir)) - .map(_.toFile) - .filter(f => f.isFile && f.getName.contains(".jar")) - .map(_.getName) - .toArray shouldBe Array("HelloWorld.jar") + File(targetDir) + .walk() + .filter(f => f.isRegularFile && f.extension.exists(_ == ".jar")) + .map(_.name) + .toSet shouldBe Set("HelloWorld.jar", "NestedHelloWorld.jar") } "should reflect the correct package order" in { - val List(foo) = cpg.typeDecl.fullNameExact("Foo").l - foo.name shouldBe "Foo" - - val List(bar) = cpg.typeDecl.fullNameExact("pac.Bar").l - bar.name shouldBe "Bar" - - cpg.method.filterNot(_.isExternal).fullName.toSetMutable shouldBe Set( - "Foo.:void()", - "Foo.add:int(int,int)", - "pac.Bar.sub:int(int,int)", - "pac.Bar.:void()" - ) + for ((name, cpg) <- validCpgs) { + val List(foo) = cpg.typeDecl.fullNameExact("Foo").l + foo.name shouldBe "Foo" + + val List(bar) = cpg.typeDecl.fullNameExact("pac.Bar").l + bar.name shouldBe "Bar" + + cpg.method.filterNot(_.isExternal).fullName.toSet shouldBe Set( + "Foo.:void()", + "Foo.add:int(int,int)", + "pac.Bar.sub:int(int,int)", + "pac.Bar.:void()" + ) + } + } + + "should not extract zip entries with paths that contain '..' ala zipslip" in { + slippyCpg.typeDecl shouldBe empty + slippyCpg.method.filterNot(_.isExternal) shouldBe empty } } From 3482436ecc8cab46c564eb4755662b2ba6c0455c Mon Sep 17 00:00:00 2001 From: Reuben Steenekamp Date: Tue, 11 Jul 2023 14:51:48 +0200 Subject: [PATCH 9/9] Fix test description. --- .../test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala b/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala index 6c364b00776b..561f6e5861c6 100644 --- a/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala +++ b/joern-cli/frontends/jimple2cpg/src/test/scala/io/joern/jimple2cpg/unpacking/JarUnpacking.scala @@ -36,7 +36,7 @@ class JarUnpacking extends AnyWordSpec with Matchers with BeforeAndAfterAll { fail("Unable to obtain test resources.", x) } - "'resources/unpacking/helloworld' should contain 'HelloWorld.jar'" in { + "'resources/unpacking' should contain 'HelloWorld.jar' and 'NestedHelloWorld.jar'" in { val targetDir = ProjectRoot.relativise("joern-cli/frontends/jimple2cpg/src/test/resources/unpacking") File(targetDir) .walk()