From e649defdae1e8f376ee73d0a96588edd00a0836a Mon Sep 17 00:00:00 2001 From: Prabhu Subramanian Date: Tue, 9 Jul 2024 12:52:32 +0100 Subject: [PATCH] Update chen to bring c fullname improvements Signed-off-by: Prabhu Subramanian --- .scalafmt.conf | 3 +- README.md | 4 +- build.sbt | 4 +- codemeta.json | 2 +- src/main/scala/io/appthreat/atom/Atom.scala | 1125 ++++++------- .../atom/dataflows/DataFlowGraph.scala | 126 +- .../atom/dataflows/OssDataFlow.scala | 22 +- .../atom/frontends/clike/C2Atom.scala | 10 +- .../scala/io/appthreat/atom/package.scala | 164 +- .../parsedeps/PythonDependencyParser.scala | 190 +-- .../io/appthreat/atom/parsedeps/package.scala | 120 +- .../appthreat/atom/passes/DataDepsPass.scala | 62 +- .../atom/passes/SafeConcurrentCpgPass.scala | 142 +- .../appthreat/atom/passes/TypeHintPass.scala | 14 +- .../atom/slicing/DataFlowSlicing.scala | 260 +-- .../atom/slicing/ReachableSlicing.scala | 623 +++---- .../appthreat/atom/slicing/UsageSlicing.scala | 1231 +++++++------- .../io/appthreat/atom/slicing/package.scala | 1432 ++++++++--------- wrapper/nodejs/package-lock.json | 4 +- wrapper/nodejs/package.json | 2 +- 20 files changed, 2771 insertions(+), 2769 deletions(-) diff --git a/.scalafmt.conf b/.scalafmt.conf index cccc72a..3abbee7 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -1,10 +1,11 @@ -version = 3.8.1 +version = 3.8.2 runner.dialect = scala3 preset = IntelliJ maxColumn = 100 align.preset = true indent.main = 4 +indent.significant = 2 newlines.source = keep rewrite.scala3.convertToNewSyntax = true diff --git a/README.md b/README.md index fd20c0d..314cf1b 100644 --- a/README.md +++ b/README.md @@ -184,10 +184,10 @@ docker run --rm -v /tmp:/tmp -v $HOME:$HOME -v $(pwd):/app:rw -it ghcr.io/appthr ## Languages supported - C/C++ -- H (C/C++ Header files alone) +- H (C/C++ Header and pre-processed .i files alone) - Java (Requires compilation) - Jar -- Android APK (Requires Android SDK. Set the environment variable `ANDROID_HOME`) +- Android APK (Requires Android SDK. Set the environment variable `ANDROID_HOME` or use the container image.) - JavaScript - TypeScript - Python diff --git a/build.sbt b/build.sbt index 93818d0..0065d17 100644 --- a/build.sbt +++ b/build.sbt @@ -1,9 +1,9 @@ name := "atom" ThisBuild / organization := "io.appthreat" -ThisBuild / version := "2.0.16" +ThisBuild / version := "2.0.17" ThisBuild / scalaVersion := "3.4.2" -val chenVersion = "2.1.2" +val chenVersion = "2.1.4" lazy val atom = Projects.atom diff --git a/codemeta.json b/codemeta.json index 0ba2de0..b8ffde4 100644 --- a/codemeta.json +++ b/codemeta.json @@ -7,7 +7,7 @@ "downloadUrl": "https://github.com/AppThreat/atom", "issueTracker": "https://github.com/AppThreat/atom/issues", "name": "atom", - "version": "2.0.16", + "version": "2.0.17", "description": "Atom is a novel intermediate representation for next-generation code analysis.", "applicationCategory": "code-analysis", "keywords": [ diff --git a/src/main/scala/io/appthreat/atom/Atom.scala b/src/main/scala/io/appthreat/atom/Atom.scala index 4696651..45be6f3 100644 --- a/src/main/scala/io/appthreat/atom/Atom.scala +++ b/src/main/scala/io/appthreat/atom/Atom.scala @@ -45,590 +45,591 @@ import scala.util.{Failure, Properties, Success, Try} object Atom: - val DEFAULT_ATOM_OUT_FILE: String = - if Properties.isWin || Charset.defaultCharset() != Charset.forName("UTF-8") then "app.atom" - else "app.⚛" - val DEFAULT_SLICE_OUT_FILE = "slices.json" - val DEFAULT_SLICE_DEPTH = 7 - val DEFAULT_MAX_DEFS: Int = 2000 - val FRAMEWORK_INPUT_TAG: String = "framework-input" - val FRAMEWORK_OUTPUT_TAG: String = "framework-output" - val DEFAULT_EXPORT_DIR: String = "atom-exports" - // Possible values: graphml, dot - val DEFAULT_EXPORT_FORMAT: String = "graphml" - // Possible values: no-delombok, default, types-only, run-delombok - private val DEFAULT_DELOMBOK_MODE: String = - sys.env.getOrElse("CHEN_DELOMBOK_MODE", "types-only") - private val TYPE_PROPAGATION_ITERATIONS = 1 - private val MAVEN_JAR_PATH: File = File.home / ".m2" / "repository" - private val GRADLE_JAR_PATH: File = File.home / ".gradle" / "caches" / "modules-2" / "files-2.1" - private val SBT_JAR_PATH: File = File.home / ".ivy2" / "cache" - private val JAR_INFERENCE_PATHS: Set[String] = - Set(MAVEN_JAR_PATH.pathAsString, GRADLE_JAR_PATH.pathAsString, SBT_JAR_PATH.pathAsString) - private val ANDROID_JAR_PATH: Option[String] = - Option(System.getenv("ANDROID_HOME")).flatMap { androidHome => - if File(androidHome).isDirectory then - File(androidHome).glob("**/android.jar").map(_.pathAsString).toSeq.headOption - else None - } + val DEFAULT_ATOM_OUT_FILE: String = + if Properties.isWin || Charset.defaultCharset() != Charset.forName("UTF-8") then "app.atom" + else "app.⚛" + val DEFAULT_SLICE_OUT_FILE = "slices.json" + val DEFAULT_SLICE_DEPTH = 7 + val DEFAULT_MAX_DEFS: Int = 2000 + val FRAMEWORK_INPUT_TAG: String = "framework-input" + val FRAMEWORK_OUTPUT_TAG: String = "framework-output" + val DEFAULT_EXPORT_DIR: String = "atom-exports" + // Possible values: graphml, dot + val DEFAULT_EXPORT_FORMAT: String = "graphml" + // Possible values: no-delombok, default, types-only, run-delombok + private val DEFAULT_DELOMBOK_MODE: String = + sys.env.getOrElse("CHEN_DELOMBOK_MODE", "types-only") + private val TYPE_PROPAGATION_ITERATIONS = 1 + private val MAVEN_JAR_PATH: File = File.home / ".m2" / "repository" + private val GRADLE_JAR_PATH: File = File.home / ".gradle" / "caches" / "modules-2" / "files-2.1" + private val SBT_JAR_PATH: File = File.home / ".ivy2" / "cache" + private val JAR_INFERENCE_PATHS: Set[String] = + Set(MAVEN_JAR_PATH.pathAsString, GRADLE_JAR_PATH.pathAsString, SBT_JAR_PATH.pathAsString) + private val ANDROID_JAR_PATH: Option[String] = + Option(System.getenv("ANDROID_HOME")).flatMap { androidHome => + if File(androidHome).isDirectory then + File(androidHome).glob("**/android.jar").map(_.pathAsString).toSeq.headOption + else None + } - private val COMMON_IGNORE_REGEX = ".*(test|docs|example|samples|mocks|Documentation|demos).*" + private val COMMON_IGNORE_REGEX = ".*(test|docs|example|samples|mocks|Documentation|demos).*" - private val CHEN_INCLUDE_PATH = sys.env.getOrElse("CHEN_INCLUDE_PATH", "") - // Custom include paths for c/c++ - private val C2ATOM_INCLUDE_PATH = - if CHEN_INCLUDE_PATH.nonEmpty && File( - CHEN_INCLUDE_PATH - ).isDirectory - then CHEN_INCLUDE_PATH.split(java.io.File.pathSeparator).toSet - else - Set.empty + private val CHEN_INCLUDE_PATH = sys.env.getOrElse("CHEN_INCLUDE_PATH", "") + // Custom include paths for c/c++ + private val C2ATOM_INCLUDE_PATH = + if CHEN_INCLUDE_PATH.nonEmpty && File( + CHEN_INCLUDE_PATH + ).isDirectory + then CHEN_INCLUDE_PATH.split(java.io.File.pathSeparator).toSet + else + Set.empty - private val optionParser: OptionParser[BaseConfig] = new scopt.OptionParser[BaseConfig]("atom"): - arg[String]("input") - .optional() - .text("source file or directory") - .action((x, c) => c.withInputPath(File(x))) - .validate { x => - if x == "" then failure("Input path required") - else if !File(x).exists then - failure(s"Input path does not exist at `\"$x\"`, exiting.") - else success - } - opt[String]('o', "output") - .text("output filename. Default app.⚛ or app.atom in windows") - .action((x, c) => - c match - case config: AtomConfig => config.withOutputAtomFile(File(x)) - case _ => c - ) - opt[String]('s', "slice-outfile") - .text("export intra-procedural slices as json") - .action((x, c) => c.withOutputSliceFile(File(x))) - opt[String]('l', "language") - .text("source language") - .required() - .action((x, c) => - c match - case config: AtomConfig => config.withLanguage(x) - case _ => c - ) - .validate(x => - if x.isBlank then failure(s"Please specify a language using the --language option.") - else success - ) - opt[Unit]("with-data-deps") - .text("generate the atom with data-dependencies - defaults to `false`") - .action((_, c) => - c match - case config: AtomConfig => config.withDataDependencies(true) - case _ => c - ) - opt[Unit]("remove-atom") - .text("do not persist the atom file - defaults to `false`") - .action((_, c) => - c match - case config: AtomConfig => config.withRemoveAtom(true) - case _ => c - ) - opt[Unit]('x', "export-atom") - .text("export the atom file with data-dependencies to graphml - defaults to `false`") - .action((_, c) => - c match - case config: AtomConfig => - config.withExportAtom(true) - case _ => c - ) - opt[String]("export-dir") - .text(s"export directory. Default: $DEFAULT_EXPORT_DIR") - .action((x, c) => - c match - case config: AtomConfig => config.withExportDir(x) - case _ => c - ) - opt[String]("export-format") - .text(s"export format graphml or dot. Default: $DEFAULT_EXPORT_FORMAT") - .action((x, c) => - c match - case config: AtomConfig => config.withExportFormat(x) - case _ => c - ) - opt[String]("file-filter") - .text(s"the name of the source file to generate slices from. Uses regex.") - .action((x, c) => c.withFileFilter(Option(x))) - opt[String]("method-name-filter") - .text(s"filters in slices that go through specific methods by names. Uses regex.") - .action((x, c) => c.withMethodNameFilter(Option(x))) - opt[String]("method-parameter-filter") - .text( - s"filters in slices that go through methods with specific types on the method parameters. Uses regex." - ) - .action((x, c) => c.withMethodParamTypeFilter(Option(x))) - opt[String]("method-annotation-filter") - .text( - s"filters in slices that go through methods with specific annotations on the methods. Uses regex." - ) - .action((x, c) => c.withMethodAnnotationFilter(Option(x))) - opt[Int]("max-num-def") - .text( - s"maximum number of definitions in per-method data flow calculation - defaults to $DEFAULT_MAX_DEFS" - ) - .action((x, c) => - c match - case config: AtomConfig => config.withMaxNumDef(x) - case _ => c - ) - .validate(x => - if x <= 0 then failure("`max-num-def` must be an integer larger than 0") - else success - ) - cmd("parsedeps") - .text("Extract dependencies from the build file and imports") - .action((_, *) => AtomParseDepsConfig().withRemoveAtom(true)) - cmd("data-flow") - .text("Extract backward data-flow slices") - .action((_, _) => AtomDataFlowConfig().withDataDependencies(true)) - .children( - opt[Int]("slice-depth") - .text( - s"the max depth to traverse the DDG for the data-flow slice - defaults to $DEFAULT_SLICE_DEPTH." - ) - .action((x, c) => - c match - case c: AtomDataFlowConfig => c.copy(sliceDepth = x) - case _ => c - ), - opt[String]("sink-filter") - .text(s"filters on the sink's `code` property. Uses regex.") - .action((x, c) => - c match - case c: AtomDataFlowConfig => c.copy(sinkPatternFilter = Option(x)) - case _ => c - ) - ) - cmd("usages") - .text("Extract local variable and parameter usages") - .action((_, *) => AtomUsagesConfig().withRemoveAtom(true)) - .children( - opt[Int]("min-num-calls") - .text(s"the minimum number of calls required for a usage slice - defaults to 1.") - .action((x, c) => - c match - case c: AtomUsagesConfig => c.copy(minNumCalls = x) - case _ => c - ), - opt[Unit]("include-source") - .text(s"includes method source code in the slices - defaults to false.") - .action((_, c) => - c match - case c: AtomUsagesConfig => c.copy(includeMethodSource = true) - case _ => c - ), - opt[Unit]("extract-endpoints") - .text( - s"extract http endpoints and convert to openapi format using atom-tools - defaults to false." - ) - .action((_, c) => - c match - case c: AtomUsagesConfig => c.copy(extractEndpoints = true) - case _ => c - ) - ) - cmd("reachables") - .text("Extract reachable data-flow slices based on automated framework tags") - .action((_, *) => AtomReachablesConfig().withDataDependencies(true)) - .children( - opt[String]("source-tag") - .text(s"source tag - defaults to framework-input.") - .action((x, c) => - c match - case c: AtomReachablesConfig => c.copy(sourceTag = x) - case _ => c - ), - opt[String]("sink-tag") - .text(s"sink tag - defaults to framework-output.") - .action((x, c) => - c match - case c: AtomReachablesConfig => c.copy(sinkTag = x) - case _ => c - ), - opt[Int]("slice-depth") - .text( - s"the max depth to traverse the DDG during reverse reachability - defaults to $DEFAULT_SLICE_DEPTH." - ) - .action((x, c) => - c match - case c: AtomReachablesConfig => c.copy(sliceDepth = x) - case _ => c - ), - opt[Unit]("include-crypto") - .text(s"includes crypto library flows - defaults to false.") - .action((_, c) => - c match - case c: AtomReachablesConfig => c.copy(includeCryptoFlows = true) - case _ => c - ) - ) - help("help").text("display this help message") + private val optionParser: OptionParser[BaseConfig] = new scopt.OptionParser[BaseConfig]("atom"): + arg[String]("input") + .optional() + .text("source file or directory") + .action((x, c) => c.withInputPath(File(x))) + .validate { x => + if x == "" then failure("Input path required") + else if !File(x).exists then + failure(s"Input path does not exist at `\"$x\"`, exiting.") + else success + } + opt[String]('o', "output") + .text("output filename. Default app.⚛ or app.atom in windows") + .action((x, c) => + c match + case config: AtomConfig => config.withOutputAtomFile(File(x)) + case _ => c + ) + opt[String]('s', "slice-outfile") + .text("export intra-procedural slices as json") + .action((x, c) => c.withOutputSliceFile(File(x))) + opt[String]('l', "language") + .text("source language") + .required() + .action((x, c) => + c match + case config: AtomConfig => config.withLanguage(x) + case _ => c + ) + .validate(x => + if x.isBlank then failure(s"Please specify a language using the --language option.") + else success + ) + opt[Unit]("with-data-deps") + .text("generate the atom with data-dependencies - defaults to `false`") + .action((_, c) => + c match + case config: AtomConfig => config.withDataDependencies(true) + case _ => c + ) + opt[Unit]("remove-atom") + .text("do not persist the atom file - defaults to `false`") + .action((_, c) => + c match + case config: AtomConfig => config.withRemoveAtom(true) + case _ => c + ) + opt[Unit]('x', "export-atom") + .text("export the atom file with data-dependencies to graphml - defaults to `false`") + .action((_, c) => + c match + case config: AtomConfig => + config.withExportAtom(true) + case _ => c + ) + opt[String]("export-dir") + .text(s"export directory. Default: $DEFAULT_EXPORT_DIR") + .action((x, c) => + c match + case config: AtomConfig => config.withExportDir(x) + case _ => c + ) + opt[String]("export-format") + .text(s"export format graphml or dot. Default: $DEFAULT_EXPORT_FORMAT") + .action((x, c) => + c match + case config: AtomConfig => config.withExportFormat(x) + case _ => c + ) + opt[String]("file-filter") + .text(s"the name of the source file to generate slices from. Uses regex.") + .action((x, c) => c.withFileFilter(Option(x))) + opt[String]("method-name-filter") + .text(s"filters in slices that go through specific methods by names. Uses regex.") + .action((x, c) => c.withMethodNameFilter(Option(x))) + opt[String]("method-parameter-filter") + .text( + s"filters in slices that go through methods with specific types on the method parameters. Uses regex." + ) + .action((x, c) => c.withMethodParamTypeFilter(Option(x))) + opt[String]("method-annotation-filter") + .text( + s"filters in slices that go through methods with specific annotations on the methods. Uses regex." + ) + .action((x, c) => c.withMethodAnnotationFilter(Option(x))) + opt[Int]("max-num-def") + .text( + s"maximum number of definitions in per-method data flow calculation - defaults to $DEFAULT_MAX_DEFS" + ) + .action((x, c) => + c match + case config: AtomConfig => config.withMaxNumDef(x) + case _ => c + ) + .validate(x => + if x <= 0 then failure("`max-num-def` must be an integer larger than 0") + else success + ) + cmd("parsedeps") + .text("Extract dependencies from the build file and imports") + .action((_, *) => AtomParseDepsConfig().withRemoveAtom(true)) + cmd("data-flow") + .text("Extract backward data-flow slices") + .action((_, _) => AtomDataFlowConfig().withDataDependencies(true)) + .children( + opt[Int]("slice-depth") + .text( + s"the max depth to traverse the DDG for the data-flow slice - defaults to $DEFAULT_SLICE_DEPTH." + ) + .action((x, c) => + c match + case c: AtomDataFlowConfig => c.copy(sliceDepth = x) + case _ => c + ), + opt[String]("sink-filter") + .text(s"filters on the sink's `code` property. Uses regex.") + .action((x, c) => + c match + case c: AtomDataFlowConfig => c.copy(sinkPatternFilter = Option(x)) + case _ => c + ) + ) + cmd("usages") + .text("Extract local variable and parameter usages") + .action((_, *) => AtomUsagesConfig().withRemoveAtom(true)) + .children( + opt[Int]("min-num-calls") + .text(s"the minimum number of calls required for a usage slice - defaults to 1.") + .action((x, c) => + c match + case c: AtomUsagesConfig => c.copy(minNumCalls = x) + case _ => c + ), + opt[Unit]("include-source") + .text(s"includes method source code in the slices - defaults to false.") + .action((_, c) => + c match + case c: AtomUsagesConfig => c.copy(includeMethodSource = true) + case _ => c + ), + opt[Unit]("extract-endpoints") + .text( + s"extract http endpoints and convert to openapi format using atom-tools - defaults to false." + ) + .action((_, c) => + c match + case c: AtomUsagesConfig => c.copy(extractEndpoints = true) + case _ => c + ) + ) + cmd("reachables") + .text("Extract reachable data-flow slices based on automated framework tags") + .action((_, *) => AtomReachablesConfig().withDataDependencies(true)) + .children( + opt[String]("source-tag") + .text(s"source tag - defaults to framework-input.") + .action((x, c) => + c match + case c: AtomReachablesConfig => c.copy(sourceTag = x) + case _ => c + ), + opt[String]("sink-tag") + .text(s"sink tag - defaults to framework-output.") + .action((x, c) => + c match + case c: AtomReachablesConfig => c.copy(sinkTag = x) + case _ => c + ), + opt[Int]("slice-depth") + .text( + s"the max depth to traverse the DDG during reverse reachability - defaults to $DEFAULT_SLICE_DEPTH." + ) + .action((x, c) => + c match + case c: AtomReachablesConfig => c.copy(sliceDepth = x) + case _ => c + ), + opt[Unit]("include-crypto") + .text(s"includes crypto library flows - defaults to false.") + .action((_, c) => + c match + case c: AtomReachablesConfig => c.copy(includeCryptoFlows = true) + case _ => c + ) + ) + help("help").text("display this help message") - def main(args: Array[String]): Unit = - run(args) match - case Right(_) => - case Left(errMsg) => - if errMsg == null then - println("Unexpected error") - else if errMsg.nonEmpty && errMsg.contains( - "storage metadata does not contain version number" - ) - then - println( - "Existing app.atom appears to be corrupted. Please remove and re-run this command." - ) - else - println(s"Failure: $errMsg") - System.exit(1) + def main(args: Array[String]): Unit = + run(args) match + case Right(_) => + case Left(errMsg) => + if errMsg == null then + println("Unexpected error") + else if errMsg.nonEmpty && errMsg.contains( + "storage metadata does not contain version number" + ) + then + println( + "Existing app.atom appears to be corrupted. Please remove and re-run this command." + ) + else + println(s"Failure: $errMsg") + System.exit(1) - private def run(args: Array[String]): Either[String, String] = - val parserArgs = args.toList - parseConfig(parserArgs) match - case Right(config: AtomConfig) => run(config, config.language) - case Right(_) => Left("Invalid configuration generated") - case Left(err) => Left(err) + private def run(args: Array[String]): Either[String, String] = + val parserArgs = args.toList + parseConfig(parserArgs) match + case Right(config: AtomConfig) => run(config, config.language) + case Right(_) => Left("Invalid configuration generated") + case Left(err) => Left(err) - private def run(config: AtomConfig, language: String): Either[String, String] = - for - _ <- generateAtom(config, language) - yield newAtomCreatedString(config) + private def run(config: AtomConfig, language: String): Either[String, String] = + for + _ <- generateAtom(config, language) + yield newAtomCreatedString(config) - private def newAtomCreatedString(config: AtomConfig): String = - val absolutePath = config.outputAtomFile.path.toAbsolutePath - if config.removeAtom then - config.outputAtomFile.delete(true) - "" - else - s"Atom created successfully at $absolutePath\n" + private def newAtomCreatedString(config: AtomConfig): String = + val absolutePath = config.outputAtomFile.path.toAbsolutePath + if config.removeAtom then + config.outputAtomFile.delete(true) + "" + else + s"Atom created successfully at $absolutePath\n" - private def generateSlice(config: AtomConfig, ag: Cpg): Either[String, String] = - def sliceCpg(cpg: Cpg): Option[ProgramSlice] = - config match - case x: AtomDataFlowConfig => - println("Slicing the atom for data-flow. This might take a while ...") - val dataFlowConfig = migrateAtomConfigToSliceConfig(x) - new DataFlowSlicing().calculateDataFlowSlice( - cpg, - dataFlowConfig.asInstanceOf[DataFlowConfig] - ) - case x: AtomUsagesConfig => - println("Slicing the atom for usages. This might take a few minutes ...") - new ChennaiTagsPass(cpg).createAndApply() - val usagesConfig = migrateAtomConfigToSliceConfig(x) - Option(UsageSlicing.calculateUsageSlice( - cpg, - usagesConfig.asInstanceOf[UsagesConfig] - )) - case x: AtomReachablesConfig => - println("Slicing the atom for reachables. This might take a few minutes ...") - val reachablesConfig = migrateAtomConfigToSliceConfig(x) - Some(ReachableSlicing.calculateReachableSlice( - cpg, - reachablesConfig.asInstanceOf[ReachablesConfig] - )) - case _ => - None + private def generateSlice(config: AtomConfig, ag: Cpg): Either[String, String] = + def sliceCpg(cpg: Cpg): Option[ProgramSlice] = + config match + case x: AtomDataFlowConfig => + println("Slicing the atom for data-flow. This might take a while ...") + val dataFlowConfig = migrateAtomConfigToSliceConfig(x) + new DataFlowSlicing().calculateDataFlowSlice( + cpg, + dataFlowConfig.asInstanceOf[DataFlowConfig] + ) + case x: AtomUsagesConfig => + println("Slicing the atom for usages. This might take a few minutes ...") + new ChennaiTagsPass(cpg).createAndApply() + val usagesConfig = migrateAtomConfigToSliceConfig(x) + Option(UsageSlicing.calculateUsageSlice( + cpg, + usagesConfig.asInstanceOf[UsagesConfig] + )) + case x: AtomReachablesConfig => + println("Slicing the atom for reachables. This might take a few minutes ...") + val reachablesConfig = migrateAtomConfigToSliceConfig(x) + Some(ReachableSlicing.calculateReachableSlice( + cpg, + reachablesConfig.asInstanceOf[ReachablesConfig] + )) + case _ => + None - try - migrateAtomConfigToSliceConfig(config) match - case x: AtomConfig if config.exportAtom => - println(s"Exporting the atom to the directory ${x.exportDir}") - config.exportFormat match - case "graphml" => - ag.method.internal.filterNot(_.name.startsWith("<")).filterNot( - _.name.startsWith("lambda") - ).gml(x.exportDir) - case _ => - // Export all representations - ag.method.internal.filterNot(_.name.startsWith("<")).filterNot( - _.name.startsWith("lambda") - ).dot(x.exportDir) - // Export individual representations - ag.method.internal.filterNot(_.name.startsWith("<")).filterNot( - _.name.startsWith("lambda") - ).exportAllRepr(x.exportDir) - case _: DataFlowConfig => - val dataFlowSlice = sliceCpg(ag).collect { case x: DataFlowSlice => x } - val atomDataFlowSliceJson = - dataFlowSlice.map(x => - AtomDataFlowSlice(x, DataFlowGraph.buildFromSlice(x).paths).toJson - ) - saveSlice(config.outputSliceFile, atomDataFlowSliceJson) - case u: UsagesConfig => - saveSlice(config.outputSliceFile, sliceCpg(ag).map(_.toJson)) - if u.extractEndpoints then - val result = ExternalCommand.run( - s"atom-tools convert -i ${config.outputSliceFile} -t ${config.language} -f openapi3.1.0 -q -o ${config.inputPath.pathAsString}${java.io.File.separator}openapi.generated.json", - "." - ) - result match - case Success(_) => - println("openapi.generated.json created successfully.") - case Failure(exception) => - println( - s"Failed to run atom-tools. Use the atom container image or perform 'pip install atom-tools' and re-run this command. Exception: ${exception.getMessage}" - ) - case _: ReachablesConfig => - saveSlice(config.outputSliceFile, sliceCpg(ag).map(_.toJson)) - case x: AtomParseDepsConfig => - parseDependencies(ag).map(_.toJson) match - case Left(err) => return Left(err) - case Right(slice) => saveSlice(x.outputSliceFile, Option(slice)) - case _ => - end match - Right("Atom sliced successfully") - catch - case err: Throwable if err.getMessage == null => - Left(err.getStackTrace.take(7).mkString("\n")) - case err: Throwable => Left(err.getMessage) - end try - end generateSlice + try + migrateAtomConfigToSliceConfig(config) match + case x: AtomConfig if config.exportAtom => + println(s"Exporting the atom to the directory ${x.exportDir}") + config.exportFormat match + case "graphml" => + ag.method.internal.filterNot(_.name.startsWith("<")).filterNot( + _.name.startsWith("lambda") + ).gml(x.exportDir) + case _ => + // Export all representations + ag.method.internal.filterNot(_.name.startsWith("<")).filterNot( + _.name.startsWith("lambda") + ).dot(x.exportDir) + // Export individual representations + ag.method.internal.filterNot(_.name.startsWith("<")).filterNot( + _.name.startsWith("lambda") + ).exportAllRepr(x.exportDir) + case _: DataFlowConfig => + val dataFlowSlice = sliceCpg(ag).collect { case x: DataFlowSlice => x } + val atomDataFlowSliceJson = + dataFlowSlice.map(x => + AtomDataFlowSlice(x, DataFlowGraph.buildFromSlice(x).paths).toJson + ) + saveSlice(config.outputSliceFile, atomDataFlowSliceJson) + case u: UsagesConfig => + saveSlice(config.outputSliceFile, sliceCpg(ag).map(_.toJson)) + if u.extractEndpoints then + val result = ExternalCommand.run( + s"atom-tools convert -i ${config.outputSliceFile} -t ${config.language} -f openapi3.1.0 -q -o ${config + .inputPath.pathAsString}${java.io.File.separator}openapi.generated.json", + "." + ) + result match + case Success(_) => + println("openapi.generated.json created successfully.") + case Failure(exception) => + println( + s"Failed to run atom-tools. Use the atom container image or perform 'pip install atom-tools' and re-run this command. Exception: ${exception.getMessage}" + ) + case _: ReachablesConfig => + saveSlice(config.outputSliceFile, sliceCpg(ag).map(_.toJson)) + case x: AtomParseDepsConfig => + parseDependencies(ag).map(_.toJson) match + case Left(err) => return Left(err) + case Right(slice) => saveSlice(x.outputSliceFile, Option(slice)) + case _ => + end match + Right("Atom sliced successfully") + catch + case err: Throwable if err.getMessage == null => + Left(err.getStackTrace.take(7).mkString("\n")) + case err: Throwable => Left(err.getMessage) + end try + end generateSlice - private def saveSlice(outFile: File, programSlice: Option[String]): Unit = - programSlice.foreach { slice => - val finalOutputPath = - File(outFile.pathAsString) - .createFileIfNotExists() - .write(slice) - .pathAsString - println(s"Slices have been successfully written to $finalOutputPath") - } + private def saveSlice(outFile: File, programSlice: Option[String]): Unit = + programSlice.foreach { slice => + val finalOutputPath = + File(outFile.pathAsString) + .createFileIfNotExists() + .write(slice) + .pathAsString + println(s"Slices have been successfully written to $finalOutputPath") + } - private def migrateAtomConfigToSliceConfig(x: BaseConfig): BaseConfig = - (x match - case config: AtomDataFlowConfig => - DataFlowConfig( - config.sinkPatternFilter, - config.excludeOperatorCalls, - config.mustEndAtExternalMethod, - config.sliceDepth - ) - case config: AtomUsagesConfig => - UsagesConfig( - config.minNumCalls, - config.excludeOperatorCalls, - !config.includeMethodSource, - config.extractEndpoints - ) - case config: AtomReachablesConfig => - ReachablesConfig( - config.sourceTag, - config.sinkTag, - config.sliceDepth, - config.includeCryptoFlows - ) - case _ => x - ).withInputPath(x.inputPath) - .withOutputSliceFile(x.outputSliceFile) - .withFileFilter(x.fileFilter) - .withMethodNameFilter(x.methodNameFilter) - .withMethodParamTypeFilter(x.methodParamTypeFilter) - .withMethodAnnotationFilter(x.methodAnnotationFilter) + private def migrateAtomConfigToSliceConfig(x: BaseConfig): BaseConfig = + (x match + case config: AtomDataFlowConfig => + DataFlowConfig( + config.sinkPatternFilter, + config.excludeOperatorCalls, + config.mustEndAtExternalMethod, + config.sliceDepth + ) + case config: AtomUsagesConfig => + UsagesConfig( + config.minNumCalls, + config.excludeOperatorCalls, + !config.includeMethodSource, + config.extractEndpoints + ) + case config: AtomReachablesConfig => + ReachablesConfig( + config.sourceTag, + config.sinkTag, + config.sliceDepth, + config.includeCryptoFlows + ) + case _ => x + ).withInputPath(x.inputPath) + .withOutputSliceFile(x.outputSliceFile) + .withFileFilter(x.fileFilter) + .withMethodNameFilter(x.methodNameFilter) + .withMethodParamTypeFilter(x.methodParamTypeFilter) + .withMethodAnnotationFilter(x.methodAnnotationFilter) - private def loadFromOdb(filename: String): Try[Cpg] = - val odbConfig = overflowdb.Config.withDefaults().withStorageLocation(filename) - val config = CpgLoaderConfig().withOverflowConfig(odbConfig).doNotCreateIndexesOnLoad - try - Success(io.shiftleft.codepropertygraph.cpgloading.CpgLoader.loadFromOverflowDb(config)) - catch - case err: Throwable => - Failure(err) + private def loadFromOdb(filename: String): Try[Cpg] = + val odbConfig = overflowdb.Config.withDefaults().withStorageLocation(filename) + val config = CpgLoaderConfig().withOverflowConfig(odbConfig).doNotCreateIndexesOnLoad + try + Success(io.shiftleft.codepropertygraph.cpgloading.CpgLoader.loadFromOverflowDb(config)) + catch + case err: Throwable => + Failure(err) - private def generateAtom(config: AtomConfig, language: String): Either[String, String] = - generateForLanguage(language.toUpperCase(Locale.ROOT), config) + private def generateAtom(config: AtomConfig, language: String): Either[String, String] = + generateForLanguage(language.toUpperCase(Locale.ROOT), config) - private def generateForLanguage(language: String, config: AtomConfig): Either[String, String] = - val outputAtomFile = config.outputAtomFile.pathAsString - // Create a new atom - def createAtom = - language match - case "H" | "HPP" => - new C2Atom() - .createCpg( - CConfig( - includeComments = false, - logProblems = false, - includePathsAutoDiscovery = false - ) - .withLogPreprocessor(false) - .withInputPath(config.inputPath.pathAsString) - .withOutputPath(outputAtomFile) - .withFunctionBodies(false) - .withIgnoredFilesRegex( - COMMON_IGNORE_REGEX - ) - ) - case Languages.C | Languages.NEWC | "CPP" | "C++" => - new C2Cpg() - .createCpgWithOverlays( - CConfig( - includeComments = false, - logProblems = false, - includePathsAutoDiscovery = true - ) - .withLogPreprocessor(false) - .withInputPath(config.inputPath.pathAsString) - .withOutputPath(outputAtomFile) - .withFunctionBodies(true) - .withIgnoredFilesRegex( - COMMON_IGNORE_REGEX - ) - .withIncludePaths(C2ATOM_INCLUDE_PATH) + private def generateForLanguage(language: String, config: AtomConfig): Either[String, String] = + val outputAtomFile = config.outputAtomFile.pathAsString + // Create a new atom + def createAtom = + language match + case "H" | "HPP" | "I" => + new C2Atom() + .createCpg( + CConfig( + includeComments = false, + logProblems = false, + includePathsAutoDiscovery = false + ) + .withLogPreprocessor(false) + .withInputPath(config.inputPath.pathAsString) + .withOutputPath(outputAtomFile) + .withFunctionBodies(false) + .withIgnoredFilesRegex( + COMMON_IGNORE_REGEX ) - case "JAR" | "JIMPLE" | "ANDROID" | "APK" | "DEX" => - new Jimple2Cpg() - .createCpgWithOverlays( - JimpleConfig(android = ANDROID_JAR_PATH) - .withInputPath(config.inputPath.pathAsString) - .withOutputPath(outputAtomFile) - .withFullResolver(true) + ) + case Languages.C | Languages.NEWC | "CPP" | "C++" => + new C2Cpg() + .createCpgWithOverlays( + CConfig( + includeComments = false, + logProblems = false, + includePathsAutoDiscovery = true + ) + .withLogPreprocessor(false) + .withInputPath(config.inputPath.pathAsString) + .withOutputPath(outputAtomFile) + .withFunctionBodies(true) + .withIgnoredFilesRegex( + COMMON_IGNORE_REGEX ) - case Languages.JAVA | Languages.JAVASRC => - new JavaSrc2Cpg() - .createCpgWithOverlays( - JavaConfig( - fetchDependencies = true, - inferenceJarPaths = JAR_INFERENCE_PATHS, - enableTypeRecovery = true, - delombokMode = Some(DEFAULT_DELOMBOK_MODE) + .withIncludePaths(C2ATOM_INCLUDE_PATH) + ) + case "JAR" | "JIMPLE" | "ANDROID" | "APK" | "DEX" => + new Jimple2Cpg() + .createCpgWithOverlays( + JimpleConfig(android = ANDROID_JAR_PATH) + .withInputPath(config.inputPath.pathAsString) + .withOutputPath(outputAtomFile) + .withFullResolver(true) + ) + case Languages.JAVA | Languages.JAVASRC => + new JavaSrc2Cpg() + .createCpgWithOverlays( + JavaConfig( + fetchDependencies = true, + inferenceJarPaths = JAR_INFERENCE_PATHS, + enableTypeRecovery = true, + delombokMode = Some(DEFAULT_DELOMBOK_MODE) + ) + .withInputPath(config.inputPath.pathAsString) + .withDefaultIgnoredFilesRegex( + List( + "\\..*".r, + ".*build/(generated|intermediates|outputs|tmp).*" r, + ".*src/test.*" r ) - .withInputPath(config.inputPath.pathAsString) - .withDefaultIgnoredFilesRegex( - List( - "\\..*".r, - ".*build/(generated|intermediates|outputs|tmp).*" r, - ".*src/test.*" r - ) - ) - .withOutputPath(outputAtomFile) - ) - case Languages.JSSRC | Languages.JAVASCRIPT | "JS" | "TS" | "TYPESCRIPT" => - new JsSrc2Cpg() - .createCpgWithOverlays( - JSConfig() - .withDisableDummyTypes(true) - .withTypePropagationIterations(TYPE_PROPAGATION_ITERATIONS) - .withInputPath(config.inputPath.pathAsString) - .withOutputPath(outputAtomFile) ) - .map { ag => - new JavaScriptInheritanceNamePass(ag).createAndApply() - new ConstClosurePass(ag).createAndApply() - new ImportResolverPass(ag).createAndApply() - new JavaScriptTypeRecoveryPass(ag).createAndApply() - new TypeHintPass(ag).createAndApply() - ag - } - case Languages.PYTHONSRC | Languages.PYTHON | "PY" => - new Py2CpgOnFileSystem() - .createCpgWithOverlays( - PyConfig() - .withDisableDummyTypes(true) - .withTypePropagationIterations(TYPE_PROPAGATION_ITERATIONS) - .withInputPath(config.inputPath.pathAsString) - .withOutputPath(outputAtomFile) - .withDefaultIgnoredFilesRegex(List("\\..*".r)) - .withIgnoredFilesRegex( - ".*(samples|test|tests|unittests|docs|virtualenvs|venv|benchmarks|tutorials|noxfile).*" - ) + .withOutputPath(outputAtomFile) + ) + case Languages.JSSRC | Languages.JAVASCRIPT | "JS" | "TS" | "TYPESCRIPT" => + new JsSrc2Cpg() + .createCpgWithOverlays( + JSConfig() + .withDisableDummyTypes(true) + .withTypePropagationIterations(TYPE_PROPAGATION_ITERATIONS) + .withInputPath(config.inputPath.pathAsString) + .withOutputPath(outputAtomFile) + ) + .map { ag => + new JavaScriptInheritanceNamePass(ag).createAndApply() + new ConstClosurePass(ag).createAndApply() + new ImportResolverPass(ag).createAndApply() + new JavaScriptTypeRecoveryPass(ag).createAndApply() + new TypeHintPass(ag).createAndApply() + ag + } + case Languages.PYTHONSRC | Languages.PYTHON | "PY" => + new Py2CpgOnFileSystem() + .createCpgWithOverlays( + PyConfig() + .withDisableDummyTypes(true) + .withTypePropagationIterations(TYPE_PROPAGATION_ITERATIONS) + .withInputPath(config.inputPath.pathAsString) + .withOutputPath(outputAtomFile) + .withDefaultIgnoredFilesRegex(List("\\..*".r)) + .withIgnoredFilesRegex( + ".*(samples|test|tests|unittests|docs|virtualenvs|venv|benchmarks|tutorials|noxfile).*" ) - .map { ag => - new PythonImportsPass(ag).createAndApply() - new PyImportResolverPass(ag).createAndApply() - new DynamicTypeHintFullNamePass(ag).createAndApply() - new PythonInheritanceNamePass(ag).createAndApply() - new PythonTypeRecoveryPass( - ag, - XTypeRecoveryConfig(enabledDummyTypes = false) - ) - .createAndApply() - new PythonTypeHintCallLinker(ag).createAndApply() - new AstLinkerPass(ag).createAndApply() - ag - } - case Languages.PHP => - new Php2Atom().createCpgWithOverlays( - PhpConfig() - .withDisableDummyTypes(true) - .withInputPath(config.inputPath.pathAsString) - .withOutputPath(outputAtomFile) - .withDefaultIgnoredFilesRegex(List("\\..*".r)) - .withIgnoredFilesRegex(".*(samples|examples|docs|tests).*") - ).map { ag => - new PhpSetKnownTypesPass(ag).createAndApply() - ag - } - case _ => Failure( - new RuntimeException( - s"No language frontend supported for language '$language'" - ) + ) + .map { ag => + new PythonImportsPass(ag).createAndApply() + new PyImportResolverPass(ag).createAndApply() + new DynamicTypeHintFullNamePass(ag).createAndApply() + new PythonInheritanceNamePass(ag).createAndApply() + new PythonTypeRecoveryPass( + ag, + XTypeRecoveryConfig(enabledDummyTypes = false) ) - // Should we reuse or create the atom - def getOrCreateAtom = - config match - case x: AtomConfig - if (x.isInstanceOf[ - AtomUsagesConfig - ] || config.exportAtom) && config.outputAtomFile.exists() => - config.withRemoveAtom(false) - try - loadFromOdb(outputAtomFile) - catch - case _: Throwable => - println("Removing the existing atom file since it is corrupted.") - config.outputAtomFile.delete(true) - createAtom - case _ => + .createAndApply() + new PythonTypeHintCallLinker(ag).createAndApply() + new AstLinkerPass(ag).createAndApply() + ag + } + case Languages.PHP => + new Php2Atom().createCpgWithOverlays( + PhpConfig() + .withDisableDummyTypes(true) + .withInputPath(config.inputPath.pathAsString) + .withOutputPath(outputAtomFile) + .withDefaultIgnoredFilesRegex(List("\\..*".r)) + .withIgnoredFilesRegex(".*(samples|examples|docs|tests).*") + ).map { ag => + new PhpSetKnownTypesPass(ag).createAndApply() + ag + } + case _ => Failure( + new RuntimeException( + s"No language frontend supported for language '$language'" + ) + ) + // Should we reuse or create the atom + def getOrCreateAtom = + config match + case x: AtomConfig + if (x.isInstanceOf[ + AtomUsagesConfig + ] || config.exportAtom) && config.outputAtomFile.exists() => + config.withRemoveAtom(false) + try + loadFromOdb(outputAtomFile) + catch + case _: Throwable => + println("Removing the existing atom file since it is corrupted.") config.outputAtomFile.delete(true) createAtom + case _ => + config.outputAtomFile.delete(true) + createAtom - getOrCreateAtom match - case Failure(exception) => - Left(exception.getMessage) - case Success(ag) => - config match - case x: AtomConfig - if x.dataDeps || x.isInstanceOf[AtomDataFlowConfig] || x.isInstanceOf[ - AtomReachablesConfig - ] => - println("Generating data-flow dependencies from atom. Please wait ...") - // Enhance with simple and easy tags - new EasyTagsPass(ag).createAndApply() - // Enhance with the BOM from cdxgen - new CdxPass(ag).createAndApply() - new ChennaiTagsPass(ag).createAndApply() - new OssDataFlow(new OssDataFlowOptions(maxNumberOfDefinitions = - x.maxNumDef - )) - .run(new LayerCreatorContext(ag)) - case _ => - generateSlice(config, ag) - try - ag.close() - catch - case err: Throwable if err.getMessage == null => - Left(err.getStackTrace.take(7).mkString("\n")) - case err: Throwable => Left(err.getMessage) - Right("Atom generation successful") - end match - end generateForLanguage + getOrCreateAtom match + case Failure(exception) => + Left(exception.getMessage) + case Success(ag) => + config match + case x: AtomConfig + if x.dataDeps || x.isInstanceOf[AtomDataFlowConfig] || x.isInstanceOf[ + AtomReachablesConfig + ] => + println("Generating data-flow dependencies from atom. Please wait ...") + // Enhance with simple and easy tags + new EasyTagsPass(ag).createAndApply() + // Enhance with the BOM from cdxgen + new CdxPass(ag).createAndApply() + new ChennaiTagsPass(ag).createAndApply() + new OssDataFlow(new OssDataFlowOptions(maxNumberOfDefinitions = + x.maxNumDef + )) + .run(new LayerCreatorContext(ag)) + case _ => + generateSlice(config, ag) + try + ag.close() + catch + case err: Throwable if err.getMessage == null => + Left(err.getStackTrace.take(7).mkString("\n")) + case err: Throwable => Left(err.getMessage) + Right("Atom generation successful") + end match + end generateForLanguage - private def parseConfig(parserArgs: List[String]): Either[String, BaseConfig] = - optionParser.parse( - parserArgs, - DefaultAtomConfig() - .withOutputAtomFile(File(DEFAULT_ATOM_OUT_FILE)) - .withOutputSliceFile(File(DEFAULT_SLICE_OUT_FILE)) - ) match - case Some(config) => - Right(config) - case None => - Left("Could not parse command line options") + private def parseConfig(parserArgs: List[String]): Either[String, BaseConfig] = + optionParser.parse( + parserArgs, + DefaultAtomConfig() + .withOutputAtomFile(File(DEFAULT_ATOM_OUT_FILE)) + .withOutputSliceFile(File(DEFAULT_SLICE_OUT_FILE)) + ) match + case Some(config) => + Right(config) + case None => + Left("Could not parse command line options") end Atom diff --git a/src/main/scala/io/appthreat/atom/dataflows/DataFlowGraph.scala b/src/main/scala/io/appthreat/atom/dataflows/DataFlowGraph.scala index fd4f3e3..1eec6e8 100644 --- a/src/main/scala/io/appthreat/atom/dataflows/DataFlowGraph.scala +++ b/src/main/scala/io/appthreat/atom/dataflows/DataFlowGraph.scala @@ -8,48 +8,48 @@ import scala.collection.mutable private class DataFlowGraph(nodes: Set[Option[DFNode]]): - private type Path = List[Long] - // Maximum number of data-flow paths to compute - private val MAX_PATHS = 100 + private type Path = List[Long] + // Maximum number of data-flow paths to compute + private val MAX_PATHS = 100 - def paths: Set[Path] = - implicit val finalSet: mutable.Set[Path] = mutable.Set.empty - implicit val nMap: Map[Long, DFNode] = nodes.map(x => x.get.id -> x.get).toMap - nodes.foreach { n => - val currPath = List(n.get.id) - follow(currPath, n.get.out.flatMap(nMap.get)) - } - finalSet.toSet + def paths: Set[Path] = + implicit val finalSet: mutable.Set[Path] = mutable.Set.empty + implicit val nMap: Map[Long, DFNode] = nodes.map(x => x.get.id -> x.get).toMap + nodes.foreach { n => + val currPath = List(n.get.id) + follow(currPath, n.get.out.flatMap(nMap.get)) + } + finalSet.toSet - private def isSubList[A](short: List[A], long: List[A]): Boolean = - val sLong = long.to(LazyList) - val sShort = short.to(LazyList) - sLong.tails.exists(_.startsWith(sShort)) + private def isSubList[A](short: List[A], long: List[A]): Boolean = + val sLong = long.to(LazyList) + val sShort = short.to(LazyList) + sLong.tails.exists(_.startsWith(sShort)) - private def isSubList[A](lst: List[A])(implicit finalSet: mutable.Set[Path]): Boolean = - finalSet.filterNot(_.size < lst.size).exists(xs => isSubList(lst, xs)) + private def isSubList[A](lst: List[A])(implicit finalSet: mutable.Set[Path]): Boolean = + finalSet.filterNot(_.size < lst.size).exists(xs => isSubList(lst, xs)) - /** Is there an existing path that starts and ends with the same node - */ - private def isDuplicate(finalSet: mutable.Set[Path], path: Path): Boolean = - finalSet.exists(apath => - apath.headOption == path.headOption && apath.lastOption == path.lastOption - ) + /** Is there an existing path that starts and ends with the same node + */ + private def isDuplicate(finalSet: mutable.Set[Path], path: Path): Boolean = + finalSet.exists(apath => + apath.headOption == path.headOption && apath.lastOption == path.lastOption + ) - private def follow(currPath: List[Long], outNodes: Set[DFNode])( - implicit - nMap: Map[Long, DFNode], - finalSet: mutable.Set[Path] - ): Unit = - outNodes.foreach { x => - val path = currPath :+ x.id - val queue = x.out.filterNot(currPath.contains) - if queue.isEmpty then - if !isDuplicate(finalSet, path) && !isSubList(path) then - finalSet.add(path) - else if finalSet.size < MAX_PATHS then - follow(path, queue.flatMap(nMap.get)) - } + private def follow(currPath: List[Long], outNodes: Set[DFNode])( + implicit + nMap: Map[Long, DFNode], + finalSet: mutable.Set[Path] + ): Unit = + outNodes.foreach { x => + val path = currPath :+ x.id + val queue = x.out.filterNot(currPath.contains) + if queue.isEmpty then + if !isDuplicate(finalSet, path) && !isSubList(path) then + finalSet.add(path) + else if finalSet.size < MAX_PATHS then + follow(path, queue.flatMap(nMap.get)) + } end DataFlowGraph private final case class DFNode( @@ -62,33 +62,33 @@ private final case class DFNode( object DataFlowGraph: - private def DF_EDGES = - Set(EdgeTypes.REACHING_DEF, EdgeTypes.CALL, EdgeTypes.REF) - val exec: ExecutorService = - Executors.newVirtualThreadPerTaskExecutor() + private def DF_EDGES = + Set(EdgeTypes.REACHING_DEF, EdgeTypes.CALL, EdgeTypes.REF) + val exec: ExecutorService = + Executors.newVirtualThreadPerTaskExecutor() - def buildFromSlice(slice: DataFlowSlice): DataFlowGraph = - val dfNodes = slice.nodes - .flatMap { - case n if n.fullName.startsWith(" None - case n => Some(n) - } - .map(n => exec.submit(new DFNodeTask(slice, n))) - .map(TimedGet) - .filter(_.isDefined) - new DataFlowGraph(dfNodes) + def buildFromSlice(slice: DataFlowSlice): DataFlowGraph = + val dfNodes = slice.nodes + .flatMap { + case n if n.fullName.startsWith(" None + case n => Some(n) + } + .map(n => exec.submit(new DFNodeTask(slice, n))) + .map(TimedGet) + .filter(_.isDefined) + new DataFlowGraph(dfNodes) - private def TimedGet(dfn: Future[DFNode]) = - try - Option(dfn.get(5, TimeUnit.SECONDS)) - catch - case _: Throwable => None + private def TimedGet(dfn: Future[DFNode]) = + try + Option(dfn.get(5, TimeUnit.SECONDS)) + catch + case _: Throwable => None - private class DFNodeTask(slice: DataFlowSlice, n: SliceNode) extends Callable[DFNode]: - override def call(): DFNode = - val inEs = - slice.edges.filter(e => DF_EDGES.contains(e.label) && e.dst == n.id).map(_.src) - val outEs = - slice.edges.filter(e => DF_EDGES.contains(e.label) && e.src == n.id).map(_.dst) - DFNode(n.id, n.isExternal, n.label, inEs, outEs) + private class DFNodeTask(slice: DataFlowSlice, n: SliceNode) extends Callable[DFNode]: + override def call(): DFNode = + val inEs = + slice.edges.filter(e => DF_EDGES.contains(e.label) && e.dst == n.id).map(_.src) + val outEs = + slice.edges.filter(e => DF_EDGES.contains(e.label) && e.src == n.id).map(_.dst) + DFNode(n.id, n.isExternal, n.label, inEs, outEs) end DataFlowGraph diff --git a/src/main/scala/io/appthreat/atom/dataflows/OssDataFlow.scala b/src/main/scala/io/appthreat/atom/dataflows/OssDataFlow.scala index 469390f..e88288e 100644 --- a/src/main/scala/io/appthreat/atom/dataflows/OssDataFlow.scala +++ b/src/main/scala/io/appthreat/atom/dataflows/OssDataFlow.scala @@ -6,10 +6,10 @@ import io.appthreat.dataflowengineoss.semanticsloader.{FlowSemantic, Semantics} import io.shiftleft.semanticcpg.layers.{LayerCreator, LayerCreatorContext, LayerCreatorOptions} object OssDataFlow: - val overlayName: String = "dataflowOss" - val description: String = "Layer to support the atom data flow tracker" + val overlayName: String = "dataflowOss" + val description: String = "Layer to support the atom data flow tracker" - def defaultOpts = new OssDataFlowOptions() + def defaultOpts = new OssDataFlowOptions() class OssDataFlowOptions( var maxNumberOfDefinitions: Int = 2000, @@ -20,12 +20,12 @@ class OssDataFlow(opts: OssDataFlowOptions)(implicit s: Semantics = Semantics.fromList(DefaultSemantics().elements ++ opts.extraFlows) ) extends LayerCreator: - override val overlayName: String = OssDataFlow.overlayName - override val description: String = OssDataFlow.description + override val overlayName: String = OssDataFlow.overlayName + override val description: String = OssDataFlow.description - override def create(context: LayerCreatorContext, storeUndoInfo: Boolean): Unit = - val cpg = context.cpg - val enhancementExecList = Iterator(new DataDepsPass(cpg, opts.maxNumberOfDefinitions)) - enhancementExecList.zipWithIndex.foreach { case (pass, index) => - runPass(pass, context, storeUndoInfo, index) - } + override def create(context: LayerCreatorContext, storeUndoInfo: Boolean): Unit = + val cpg = context.cpg + val enhancementExecList = Iterator(new DataDepsPass(cpg, opts.maxNumberOfDefinitions)) + enhancementExecList.zipWithIndex.foreach { case (pass, index) => + runPass(pass, context, storeUndoInfo, index) + } diff --git a/src/main/scala/io/appthreat/atom/frontends/clike/C2Atom.scala b/src/main/scala/io/appthreat/atom/frontends/clike/C2Atom.scala index 38adf00..c5945c2 100644 --- a/src/main/scala/io/appthreat/atom/frontends/clike/C2Atom.scala +++ b/src/main/scala/io/appthreat/atom/frontends/clike/C2Atom.scala @@ -12,8 +12,8 @@ import scala.util.Try class C2Atom extends X2CpgFrontend[Config]: - def createCpg(config: Config): Try[Cpg] = - withNewEmptyCpg(config.outputPath, config) { (cpg, config) => - new MetaDataPass(cpg, Languages.NEWC, config.inputPath).createAndApply() - new AstCreationPass(cpg, config).createAndApply() - } + def createCpg(config: Config): Try[Cpg] = + withNewEmptyCpg(config.outputPath, config) { (cpg, config) => + new MetaDataPass(cpg, Languages.NEWC, config.inputPath).createAndApply() + new AstCreationPass(cpg, config).createAndApply() + } diff --git a/src/main/scala/io/appthreat/atom/package.scala b/src/main/scala/io/appthreat/atom/package.scala index 39994d7..2558ef7 100644 --- a/src/main/scala/io/appthreat/atom/package.scala +++ b/src/main/scala/io/appthreat/atom/package.scala @@ -7,86 +7,86 @@ import io.circe.{Encoder, Json} package object atom: - trait AtomConfig extends BaseConfig: - this.inputPath = File(".") - this.outputSliceFile = File(DEFAULT_SLICE_OUT_FILE) - var outputAtomFile: File = File(DEFAULT_ATOM_OUT_FILE) - var language: String = "" - var dataDeps: Boolean = false - var removeAtom: Boolean = false - var maxNumDef: Int = DEFAULT_MAX_DEFS - var exportAtom: Boolean = false - var exportDir: String = DEFAULT_EXPORT_DIR - var exportFormat: String = DEFAULT_EXPORT_FORMAT - - def withOutputAtomFile(x: File): AtomConfig = - this.outputAtomFile = x - this - - def withLanguage(x: String): AtomConfig = - this.language = x - this - - def withDataDependencies(x: Boolean): AtomConfig = - this.dataDeps = x - this - - def withRemoveAtom(x: Boolean): AtomConfig = - this.removeAtom = x - this - - def withExportAtom(x: Boolean): AtomConfig = - this.exportAtom = x - this - - def withExportDir(x: String): AtomConfig = - this.exportDir = x - this - - def withExportFormat(x: String): AtomConfig = - this.exportFormat = x - this - - def withMaxNumDef(x: Int): AtomConfig = - this.maxNumDef = x - this - end AtomConfig - - case class DefaultAtomConfig() extends AtomConfig - - case class AtomParseDepsConfig() extends AtomConfig - - case class AtomDataFlowConfig( - sinkPatternFilter: Option[String] = None, - excludeOperatorCalls: Boolean = true, - mustEndAtExternalMethod: Boolean = true, - sliceDepth: Int = DEFAULT_SLICE_DEPTH - ) extends AtomConfig - - case class AtomUsagesConfig( - minNumCalls: Int = 1, - excludeOperatorCalls: Boolean = true, - includeMethodSource: Boolean = false, - extractEndpoints: Boolean = false - ) extends AtomConfig - - case class AtomReachablesConfig( - sourceTag: String = FRAMEWORK_INPUT_TAG, - sinkTag: String = FRAMEWORK_OUTPUT_TAG, - sliceDepth: Int = DEFAULT_SLICE_DEPTH, - includeCryptoFlows: Boolean = false - ) extends AtomConfig - - import io.appthreat.atom.slicing.* - import io.circe.generic.auto.* - import io.circe.syntax.EncoderOps - - implicit val encodeDataFlowSlice: Encoder[AtomDataFlowSlice] = Encoder.instance { - case AtomDataFlowSlice(dataFlowSlice, paths) => - Json.obj("graph" -> dataFlowSlice.asJson, "paths" -> paths.asJson) - } - - case class AtomDataFlowSlice(graph: DataFlowSlice, paths: Set[List[Long]] = Set.empty): - - def toJson: String = this.asJson.noSpaces + trait AtomConfig extends BaseConfig: + this.inputPath = File(".") + this.outputSliceFile = File(DEFAULT_SLICE_OUT_FILE) + var outputAtomFile: File = File(DEFAULT_ATOM_OUT_FILE) + var language: String = "" + var dataDeps: Boolean = false + var removeAtom: Boolean = false + var maxNumDef: Int = DEFAULT_MAX_DEFS + var exportAtom: Boolean = false + var exportDir: String = DEFAULT_EXPORT_DIR + var exportFormat: String = DEFAULT_EXPORT_FORMAT + + def withOutputAtomFile(x: File): AtomConfig = + this.outputAtomFile = x + this + + def withLanguage(x: String): AtomConfig = + this.language = x + this + + def withDataDependencies(x: Boolean): AtomConfig = + this.dataDeps = x + this + + def withRemoveAtom(x: Boolean): AtomConfig = + this.removeAtom = x + this + + def withExportAtom(x: Boolean): AtomConfig = + this.exportAtom = x + this + + def withExportDir(x: String): AtomConfig = + this.exportDir = x + this + + def withExportFormat(x: String): AtomConfig = + this.exportFormat = x + this + + def withMaxNumDef(x: Int): AtomConfig = + this.maxNumDef = x + this + end AtomConfig + + case class DefaultAtomConfig() extends AtomConfig + + case class AtomParseDepsConfig() extends AtomConfig + + case class AtomDataFlowConfig( + sinkPatternFilter: Option[String] = None, + excludeOperatorCalls: Boolean = true, + mustEndAtExternalMethod: Boolean = true, + sliceDepth: Int = DEFAULT_SLICE_DEPTH + ) extends AtomConfig + + case class AtomUsagesConfig( + minNumCalls: Int = 1, + excludeOperatorCalls: Boolean = true, + includeMethodSource: Boolean = false, + extractEndpoints: Boolean = false + ) extends AtomConfig + + case class AtomReachablesConfig( + sourceTag: String = FRAMEWORK_INPUT_TAG, + sinkTag: String = FRAMEWORK_OUTPUT_TAG, + sliceDepth: Int = DEFAULT_SLICE_DEPTH, + includeCryptoFlows: Boolean = false + ) extends AtomConfig + + import io.appthreat.atom.slicing.* + import io.circe.generic.auto.* + import io.circe.syntax.EncoderOps + + implicit val encodeDataFlowSlice: Encoder[AtomDataFlowSlice] = Encoder.instance { + case AtomDataFlowSlice(dataFlowSlice, paths) => + Json.obj("graph" -> dataFlowSlice.asJson, "paths" -> paths.asJson) + } + + case class AtomDataFlowSlice(graph: DataFlowSlice, paths: Set[List[Long]] = Set.empty): + + def toJson: String = this.asJson.noSpaces end atom diff --git a/src/main/scala/io/appthreat/atom/parsedeps/PythonDependencyParser.scala b/src/main/scala/io/appthreat/atom/parsedeps/PythonDependencyParser.scala index 21a7112..751f207 100644 --- a/src/main/scala/io/appthreat/atom/parsedeps/PythonDependencyParser.scala +++ b/src/main/scala/io/appthreat/atom/parsedeps/PythonDependencyParser.scala @@ -13,114 +13,114 @@ import java.io.File as JFile object PythonDependencyParser extends XDependencyParser: - implicit val engineContext: EngineContext = EngineContext() - private val SETUP_PY_FILE = ".*setup.py" - private val SETUP_REQUIRES_PATTERN = "(install_requires|extras_require|tests_require)" + implicit val engineContext: EngineContext = EngineContext() + private val SETUP_PY_FILE = ".*setup.py" + private val SETUP_REQUIRES_PATTERN = "(install_requires|extras_require|tests_require)" - override def parse(cpg: Cpg): DependencySlice = DependencySlice( - (parseSetupPy(cpg) ++ parseImports(cpg)) - .groupBy(_.name) - .map { case (_, slices) => slices.reduce((a, b) => a.merge(b)) } - .toSeq - .sortBy(_.name) - ) + override def parse(cpg: Cpg): DependencySlice = DependencySlice( + (parseSetupPy(cpg) ++ parseImports(cpg)) + .groupBy(_.name) + .map { case (_, slices) => slices.reduce((a, b) => a.merge(b)) } + .toSeq + .sortBy(_.name) + ) - private def parseSetupPy(cpg: Cpg): Set[ModuleWithVersion] = - val dataFlowEnabled = cpg.metaData.overlays.contains(OssDataFlow.overlayName) - val requirementsPattern = """([\[\]/.\w_-]+)\s?((=>|<=|==|>=|=<|<|>|!=|~=).*)""".r + private def parseSetupPy(cpg: Cpg): Set[ModuleWithVersion] = + val dataFlowEnabled = cpg.metaData.overlays.contains(OssDataFlow.overlayName) + val requirementsPattern = """([\[\]/.\w_-]+)\s?((=>|<=|==|>=|=<|<|>|!=|~=).*)""".r - def dataSourcesToRequires = (cpg.literal ++ cpg.identifier) - .where(_.file.name(SETUP_PY_FILE)) - .where(_.argumentName(SETUP_REQUIRES_PATTERN)) - .collectAll[CfgNode] ++ cpg.assignment.where(_.file.name(SETUP_PY_FILE)).where( - _.source.isCall.name(".listLiteral") - ).target.isIdentifier.code(".*(libs|requirements)").collectAll[CfgNode] + def dataSourcesToRequires = (cpg.literal ++ cpg.identifier) + .where(_.file.name(SETUP_PY_FILE)) + .where(_.argumentName(SETUP_REQUIRES_PATTERN)) + .collectAll[CfgNode] ++ cpg.assignment.where(_.file.name(SETUP_PY_FILE)).where( + _.source.isCall.name(".listLiteral") + ).target.isIdentifier.code(".*(libs|requirements)").collectAll[CfgNode] - def installOrExtraRequires = - cpg.call.where(_.file.name(SETUP_PY_FILE)).where(_.argumentName( - SETUP_REQUIRES_PATTERN - )).argument.collectAll[Literal] + def installOrExtraRequires = + cpg.call.where(_.file.name(SETUP_PY_FILE)).where(_.argumentName( + SETUP_REQUIRES_PATTERN + )).argument.collectAll[Literal] - def setupCall = cpg.call("setup").where(_.file.name(SETUP_PY_FILE)) + def setupCall = cpg.call("setup").where(_.file.name(SETUP_PY_FILE)) - def findOriginalDeclaration(xs: Traversal[CfgNode]): Iterable[Literal] = - xs.flatMap { - case l: Literal => - Iterable(l) - case i: Identifier => - findOriginalDeclaration( - cpg.assignment.where(_.and( - _.file.name(SETUP_PY_FILE), - _.target.isIdentifier.nameExact(i.name) - )).source - ) - case c: Call => - findOriginalDeclaration(c.argument) - case _ => Iterable() - }.collectAll[Literal] - .to(Iterable) + def findOriginalDeclaration(xs: Traversal[CfgNode]): Iterable[Literal] = + xs.flatMap { + case l: Literal => + Iterable(l) + case i: Identifier => + findOriginalDeclaration( + cpg.assignment.where(_.and( + _.file.name(SETUP_PY_FILE), + _.target.isIdentifier.nameExact(i.name) + )).source + ) + case c: Call => + findOriginalDeclaration(c.argument) + case _ => Iterable() + }.collectAll[Literal] + .to(Iterable) - val initialTraversal = if dataFlowEnabled then setupCall.reachableBy(dataSourcesToRequires) - else (dataSourcesToRequires ++ installOrExtraRequires) - findOriginalDeclaration(initialTraversal) - .map(x => X2Cpg.stripQuotes(x.code)) - .map { + val initialTraversal = if dataFlowEnabled then setupCall.reachableBy(dataSourcesToRequires) + else (dataSourcesToRequires ++ installOrExtraRequires) + findOriginalDeclaration(initialTraversal) + .map(x => X2Cpg.stripQuotes(x.code)) + .map { - case requirementsPattern(name, versionSpecifiers, _) - if versionSpecifiers.contains("==") => - val versions = versionSpecifiers.split(',').toSeq - val exactVersion = versions.find(_.startsWith("==")).get - ModuleWithVersion( - name, - exactVersion.stripPrefix("=="), - (versions.diff(Seq(exactVersion))).mkString(",") - ) - case requirementsPattern(name, versionSpecifiers, _) => - ModuleWithVersion(name, versionSpecifiers = versionSpecifiers) - case requirementsPattern(name, version) => ModuleWithVersion(name, version) - case x => ModuleWithVersion(x) - } - .toSet - end parseSetupPy + case requirementsPattern(name, versionSpecifiers, _) + if versionSpecifiers.contains("==") => + val versions = versionSpecifiers.split(',').toSeq + val exactVersion = versions.find(_.startsWith("==")).get + ModuleWithVersion( + name, + exactVersion.stripPrefix("=="), + (versions.diff(Seq(exactVersion))).mkString(",") + ) + case requirementsPattern(name, versionSpecifiers, _) => + ModuleWithVersion(name, versionSpecifiers = versionSpecifiers) + case requirementsPattern(name, version) => ModuleWithVersion(name, version) + case x => ModuleWithVersion(x) + } + .toSet + end parseSetupPy - private def parseImports(cpg: Cpg): Set[ModuleWithVersion] = - val root = ScalaFile(cpg.metaData.root.headOption.getOrElse(JFile.separator)).pathAsString - // Get a set of local modules to exclude from imports - // Identify the local modules names based on the presence of __init__.py files - // Lastly, exclude import names that also match a local filename - val localModuleNames = cpg.file.name + private def parseImports(cpg: Cpg): Set[ModuleWithVersion] = + val root = ScalaFile(cpg.metaData.root.headOption.getOrElse(JFile.separator)).pathAsString + // Get a set of local modules to exclude from imports + // Identify the local modules names based on the presence of __init__.py files + // Lastly, exclude import names that also match a local filename + val localModuleNames = cpg.file.name + .filterNot(_ == "N/A") + .map(x => ScalaFile(x)) + .map(_.pathAsString) + .map(_.stripPrefix(s"$root${JFile.separatorChar}").split( + JFile.separatorChar + ).head.replaceFirst("\\.py", "")) + .toSet + ++ cpg.file.name(".*__init__.py").name.map(_.stripPrefix( + s"$root${JFile.separatorChar}" + ).stripSuffix(s"${JFile.separatorChar}__init__.py").split( + JFile.separatorChar + ).last).toSet + ++ cpg.file.nameNot(".*__init__.py").name .filterNot(_ == "N/A") .map(x => ScalaFile(x)) .map(_.pathAsString) .map(_.stripPrefix(s"$root${JFile.separatorChar}").split( JFile.separatorChar - ).head.replaceFirst("\\.py", "")) - .toSet - ++ cpg.file.name(".*__init__.py").name.map(_.stripPrefix( - s"$root${JFile.separatorChar}" - ).stripSuffix(s"${JFile.separatorChar}__init__.py").split( - JFile.separatorChar - ).last).toSet - ++ cpg.file.nameNot(".*__init__.py").name - .filterNot(_ == "N/A") - .map(x => ScalaFile(x)) - .map(_.pathAsString) - .map(_.stripPrefix(s"$root${JFile.separatorChar}").split( - JFile.separatorChar - ).last.replaceFirst("\\.py", "")) - .toSet - cpg.imports - .whereNot(_.call.file.name(SETUP_PY_FILE)) - .filterNot { - _.importedEntity match - case Some(x) if x.startsWith(".") => true - case Some(x) if x.contains('.') => localModuleNames.contains(x.split('.').head) - case Some(x) => localModuleNames.contains(x) - case _ => true - } - .dedup - .importedEntity - .map(x => ModuleWithVersion(name = x.split('.').head, importedSymbols = x)) + ).last.replaceFirst("\\.py", "")) .toSet - end parseImports + cpg.imports + .whereNot(_.call.file.name(SETUP_PY_FILE)) + .filterNot { + _.importedEntity match + case Some(x) if x.startsWith(".") => true + case Some(x) if x.contains('.') => localModuleNames.contains(x.split('.').head) + case Some(x) => localModuleNames.contains(x) + case _ => true + } + .dedup + .importedEntity + .map(x => ModuleWithVersion(name = x.split('.').head, importedSymbols = x)) + .toSet + end parseImports end PythonDependencyParser diff --git a/src/main/scala/io/appthreat/atom/parsedeps/package.scala b/src/main/scala/io/appthreat/atom/parsedeps/package.scala index e5078d4..df23646 100644 --- a/src/main/scala/io/appthreat/atom/parsedeps/package.scala +++ b/src/main/scala/io/appthreat/atom/parsedeps/package.scala @@ -11,72 +11,72 @@ import io.shiftleft.semanticcpg.language.{ package object parsedeps: - def parseDependencies(cpg: Cpg): Either[String, DependencySlice] = - cpg.metaData.language.map(_.toUpperCase).headOption match - case Some(language) - if Set(Languages.PYTHONSRC, Languages.PYTHON, "PY").contains(language) => - Right(PythonDependencyParser.parse(cpg)) - case Some(language) => - Left(s"'$language' is not yet supported for the `parsedeps` command") - case _ => Left("Unable to extract atom language") + def parseDependencies(cpg: Cpg): Either[String, DependencySlice] = + cpg.metaData.language.map(_.toUpperCase).headOption match + case Some(language) + if Set(Languages.PYTHONSRC, Languages.PYTHON, "PY").contains(language) => + Right(PythonDependencyParser.parse(cpg)) + case Some(language) => + Left(s"'$language' is not yet supported for the `parsedeps` command") + case _ => Left("Unable to extract atom language") - trait XDependencyParser: - def parse(cpg: Cpg): DependencySlice + trait XDependencyParser: + def parse(cpg: Cpg): DependencySlice - implicit val dependencySliceEncoder: Decoder[DependencySlice] = - (c: HCursor) => - for - modules <- c.downField("modules").as[List[ModuleWithVersion]] - yield DependencySlice(modules) - implicit val dependencySliceDecoder: Encoder[DependencySlice] = - Encoder.instance { case DependencySlice(modules) => - Json.obj("modules" -> modules.asJson) - } + implicit val dependencySliceEncoder: Decoder[DependencySlice] = + (c: HCursor) => + for + modules <- c.downField("modules").as[List[ModuleWithVersion]] + yield DependencySlice(modules) + implicit val dependencySliceDecoder: Encoder[DependencySlice] = + Encoder.instance { case DependencySlice(modules) => + Json.obj("modules" -> modules.asJson) + } - trait AtomSlice: - def toJson: String + trait AtomSlice: + def toJson: String - implicit val moduleWithVersionEncoder: Encoder[ModuleWithVersion] = - Encoder.forProduct4("name", "version", "versionSpecifiers", "importedSymbols")(x => - (x.name, x.version, x.versionSpecifiers, x.importedSymbols) - ) - implicit val moduleWithVersionDecoder: Decoder[ModuleWithVersion] = - Decoder.forProduct4("name", "version", "versionSpecifiers", "importedSymbols")( - ModuleWithVersion.apply - ) + implicit val moduleWithVersionEncoder: Encoder[ModuleWithVersion] = + Encoder.forProduct4("name", "version", "versionSpecifiers", "importedSymbols")(x => + (x.name, x.version, x.versionSpecifiers, x.importedSymbols) + ) + implicit val moduleWithVersionDecoder: Decoder[ModuleWithVersion] = + Decoder.forProduct4("name", "version", "versionSpecifiers", "importedSymbols")( + ModuleWithVersion.apply + ) - case class DependencySlice(modules: Seq[ModuleWithVersion]) extends AtomSlice: - override def toJson: String = this.asJson.noSpaces + case class DependencySlice(modules: Seq[ModuleWithVersion]) extends AtomSlice: + override def toJson: String = this.asJson.noSpaces - case class ModuleWithVersion( - name: String, - version: String = "", - versionSpecifiers: String = "", - importedSymbols: String = "" - ): + case class ModuleWithVersion( + name: String, + version: String = "", + versionSpecifiers: String = "", + importedSymbols: String = "" + ): - def merge(x: ModuleWithVersion): ModuleWithVersion = - val vs = this.versions ++ x.versions - val is = if x.importedSymbols.nonEmpty then - this.importedSymbols + "," + x.importedSymbols - else this.importedSymbols - vs.find(_.startsWith("==")) match - case Some(exactVersion) => - ModuleWithVersion( - name, - exactVersion.stripPrefix("=="), - (vs.diff(Set(exactVersion))).mkString(","), - importedSymbols = is - ) - case None => ModuleWithVersion( - name, - versionSpecifiers = vs.mkString(","), - importedSymbols = is - ) + def merge(x: ModuleWithVersion): ModuleWithVersion = + val vs = this.versions ++ x.versions + val is = if x.importedSymbols.nonEmpty then + this.importedSymbols + "," + x.importedSymbols + else this.importedSymbols + vs.find(_.startsWith("==")) match + case Some(exactVersion) => + ModuleWithVersion( + name, + exactVersion.stripPrefix("=="), + (vs.diff(Set(exactVersion))).mkString(","), + importedSymbols = is + ) + case None => ModuleWithVersion( + name, + versionSpecifiers = vs.mkString(","), + importedSymbols = is + ) - def versions: Set[String] = - (if !version.isBlank then Set(s"==$version") else Set.empty) ++ versionSpecifiers.split( - ',' - ).filterNot(_.isBlank) - end ModuleWithVersion + def versions: Set[String] = + (if !version.isBlank then Set(s"==$version") else Set.empty) ++ versionSpecifiers.split( + ',' + ).filterNot(_.isBlank) + end ModuleWithVersion end parsedeps diff --git a/src/main/scala/io/appthreat/atom/passes/DataDepsPass.scala b/src/main/scala/io/appthreat/atom/passes/DataDepsPass.scala index 3756c58..eab2c23 100644 --- a/src/main/scala/io/appthreat/atom/passes/DataDepsPass.scala +++ b/src/main/scala/io/appthreat/atom/passes/DataDepsPass.scala @@ -13,35 +13,35 @@ import scala.collection.mutable class DataDepsPass(cpg: Cpg, maxNumberOfDefinitions: Int = 2000)(implicit s: Semantics) extends SafeConcurrentCpgPass[Method](cpg): - // If there are any regex method full names, load them early - s.loadRegexSemantics(cpg) - - override def generateParts(): Array[Method] = cpg.method.toArray - - override def runOnPart(dstGraph: DiffGraphBuilder, method: Method): Unit = - val problem = ReachingDefProblem.create(method) - if shouldBailOut(method, problem) then - return - - val solution = new DataFlowSolver().calculateMopSolutionForwards(problem) - val ddgGenerator = new DdgGenerator(s) - ddgGenerator.addReachingDefEdges(dstGraph, method, problem, solution) - - /** Before we start propagating definitions in the graph, which is the bulk of the work, we - * check how many definitions were are dealing with in total. If a threshold is reached, we - * bail out instead, leaving reaching definitions uncalculated for the method in question. - * Users can increase the threshold if desired. - */ - private def shouldBailOut( - method: Method, - problem: DataFlowProblem[StoredNode, mutable.BitSet] - ): Boolean = - val transferFunction = problem.transferFunction.asInstanceOf[ReachingDefTransferFunction] - // For each node, the `gen` map contains the list of definitions it generates - // We add up the sizes of these lists to obtain the total number of definitions - val numberOfDefinitions = transferFunction.gen.foldLeft(0)(_ + _._2.size) - if numberOfDefinitions > maxNumberOfDefinitions then - true - else - false + // If there are any regex method full names, load them early + s.loadRegexSemantics(cpg) + + override def generateParts(): Array[Method] = cpg.method.toArray + + override def runOnPart(dstGraph: DiffGraphBuilder, method: Method): Unit = + val problem = ReachingDefProblem.create(method) + if shouldBailOut(method, problem) then + return + + val solution = new DataFlowSolver().calculateMopSolutionForwards(problem) + val ddgGenerator = new DdgGenerator(s) + ddgGenerator.addReachingDefEdges(dstGraph, method, problem, solution) + + /** Before we start propagating definitions in the graph, which is the bulk of the work, we check + * how many definitions were are dealing with in total. If a threshold is reached, we bail out + * instead, leaving reaching definitions uncalculated for the method in question. Users can + * increase the threshold if desired. + */ + private def shouldBailOut( + method: Method, + problem: DataFlowProblem[StoredNode, mutable.BitSet] + ): Boolean = + val transferFunction = problem.transferFunction.asInstanceOf[ReachingDefTransferFunction] + // For each node, the `gen` map contains the list of definitions it generates + // We add up the sizes of these lists to obtain the total number of definitions + val numberOfDefinitions = transferFunction.gen.foldLeft(0)(_ + _._2.size) + if numberOfDefinitions > maxNumberOfDefinitions then + true + else + false end DataDepsPass diff --git a/src/main/scala/io/appthreat/atom/passes/SafeConcurrentCpgPass.scala b/src/main/scala/io/appthreat/atom/passes/SafeConcurrentCpgPass.scala index 689d997..b8fd93d 100644 --- a/src/main/scala/io/appthreat/atom/passes/SafeConcurrentCpgPass.scala +++ b/src/main/scala/io/appthreat/atom/passes/SafeConcurrentCpgPass.scala @@ -13,87 +13,87 @@ import scala.concurrent.{Await, ExecutionContext, Future} /** SafeConcurrentCpgPass is a modified version of ConcurrentWriterCpgPass */ object SafeConcurrentCpgPass: - private val producerQueueCapacity = Runtime.getRuntime.availableProcessors() / 2 - private val writerQueueCapacity = Math.max(Math.floor(producerQueueCapacity / 2).toInt, 2) + private val producerQueueCapacity = Runtime.getRuntime.availableProcessors() / 2 + private val writerQueueCapacity = Math.max(Math.floor(producerQueueCapacity / 2).toInt, 2) abstract class SafeConcurrentCpgPass[T <: AnyRef]( cpg: Cpg, @nowarn outName: String = "", keyPool: Option[KeyPool] = None ) extends NewStyleCpgPassBase[T]: - @volatile private var nDiffT: Int = -1 + @volatile private var nDiffT: Int = -1 - override def createApplySerializeAndStore( - serializedCpg: SerializedCpg, - inverse: Boolean = false, - prefix: String = "" - ): Unit = - import SafeConcurrentCpgPass.producerQueueCapacity - var nDiff = 0 - var completedParts = 0 - nDiffT = -1 - init() - val parts = generateParts() - val nParts = parts.length - val partIter = parts.iterator - val completionQueue = - mutable.ArrayDeque[Future[overflowdb.BatchedUpdate.DiffGraphBuilder]]() - val writer = new Writer() - val writerThread = new Thread(writer) - writerThread.setName("Writer") - writerThread.start() - implicit val ec: ExecutionContext = ExecutionContextProvider.getExecutionContext - var done = false + override def createApplySerializeAndStore( + serializedCpg: SerializedCpg, + inverse: Boolean = false, + prefix: String = "" + ): Unit = + import SafeConcurrentCpgPass.producerQueueCapacity + var nDiff = 0 + var completedParts = 0 + nDiffT = -1 + init() + val parts = generateParts() + val nParts = parts.length + val partIter = parts.iterator + val completionQueue = + mutable.ArrayDeque[Future[overflowdb.BatchedUpdate.DiffGraphBuilder]]() + val writer = new Writer() + val writerThread = new Thread(writer) + writerThread.setName("Writer") + writerThread.start() + implicit val ec: ExecutionContext = ExecutionContextProvider.getExecutionContext + var done = false + try + while !done || completedParts < nParts do + if completionQueue.size < producerQueueCapacity && partIter.hasNext then + val next = partIter.next() + completionQueue.prepend(Future.apply { + val builder = new DiffGraphBuilder + runOnPart(builder, next.asInstanceOf[T]) + builder + }) + else if completionQueue.nonEmpty then + val future = completionQueue.removeLast() + val res = Await.result(future, Duration.Inf).build() + nDiff += res.size + writer.queue.put(Some(res)) + completedParts += 1 + else + writer.queue.put(None) + completedParts += 1 + done = true + finally try - while !done || completedParts < nParts do - if completionQueue.size < producerQueueCapacity && partIter.hasNext then - val next = partIter.next() - completionQueue.prepend(Future.apply { - val builder = new DiffGraphBuilder - runOnPart(builder, next.asInstanceOf[T]) - builder - }) - else if completionQueue.nonEmpty then - val future = completionQueue.removeLast() - val res = Await.result(future, Duration.Inf).build() - nDiff += res.size - writer.queue.put(Some(res)) - completedParts += 1 - else - writer.queue.put(None) - completedParts += 1 - done = true + writerThread.join() finally - try - writerThread.join() - finally - finish() - end try - end createApplySerializeAndStore + finish() + end try + end createApplySerializeAndStore - private class Writer() extends Runnable: + private class Writer() extends Runnable: - val queue = - new LinkedBlockingQueue[Option[overflowdb.BatchedUpdate.DiffGraph]]( - SafeConcurrentCpgPass.writerQueueCapacity - ) + val queue = + new LinkedBlockingQueue[Option[overflowdb.BatchedUpdate.DiffGraph]]( + SafeConcurrentCpgPass.writerQueueCapacity + ) - override def run(): Unit = - var terminate = false - var index: Int = 0 - nDiffT = 0 - var hadErrors = false - while !terminate do - try - queue.take() match - case None => - terminate = true - case Some(diffGraph) => - nDiffT += overflowdb.BatchedUpdate - .applyDiff(cpg.graph, diffGraph, keyPool.orNull, null) - .transitiveModifications() - index += 1 - finally - hadErrors = true - end Writer + override def run(): Unit = + var terminate = false + var index: Int = 0 + nDiffT = 0 + var hadErrors = false + while !terminate do + try + queue.take() match + case None => + terminate = true + case Some(diffGraph) => + nDiffT += overflowdb.BatchedUpdate + .applyDiff(cpg.graph, diffGraph, keyPool.orNull, null) + .transitiveModifications() + index += 1 + finally + hadErrors = true + end Writer end SafeConcurrentCpgPass diff --git a/src/main/scala/io/appthreat/atom/passes/TypeHintPass.scala b/src/main/scala/io/appthreat/atom/passes/TypeHintPass.scala index 172fa45..9d8fee0 100644 --- a/src/main/scala/io/appthreat/atom/passes/TypeHintPass.scala +++ b/src/main/scala/io/appthreat/atom/passes/TypeHintPass.scala @@ -8,11 +8,11 @@ import overflowdb.traversal.Traversal class TypeHintPass(cpg: Cpg) extends XTypeHintCallLinker(cpg): - override protected val pathSep = ':' + override protected val pathSep = ':' - override protected def calls: Traversal[Call] = cpg.call - .or(_.nameNot(".*", ".*"), _.name(".new")) - .filterNot(c => - c.code.startsWith("$(") || c.code.startsWith("_tmp_") || c.code.startsWith("{") - ) - .filter(c => calleeNames(c).nonEmpty && c.callee.isEmpty) + override protected def calls: Traversal[Call] = cpg.call + .or(_.nameNot(".*", ".*"), _.name(".new")) + .filterNot(c => + c.code.startsWith("$(") || c.code.startsWith("_tmp_") || c.code.startsWith("{") + ) + .filter(c => calleeNames(c).nonEmpty && c.callee.isEmpty) diff --git a/src/main/scala/io/appthreat/atom/slicing/DataFlowSlicing.scala b/src/main/scala/io/appthreat/atom/slicing/DataFlowSlicing.scala index 13cf74e..bc7e04a 100644 --- a/src/main/scala/io/appthreat/atom/slicing/DataFlowSlicing.scala +++ b/src/main/scala/io/appthreat/atom/slicing/DataFlowSlicing.scala @@ -12,140 +12,140 @@ import scala.collection.concurrent.TrieMap class DataFlowSlicing: - implicit val resolver: ICallResolver = NoResolve - protected val exec: ExecutorService = - Executors.newVirtualThreadPerTaskExecutor() - private val excludeOperatorCalls = new AtomicBoolean(true) - private val nodeCache = new TrieMap[Long, SliceNode]() - private var language: Option[String] = scala.compiletime.uninitialized + implicit val resolver: ICallResolver = NoResolve + protected val exec: ExecutorService = + Executors.newVirtualThreadPerTaskExecutor() + private val excludeOperatorCalls = new AtomicBoolean(true) + private val nodeCache = new TrieMap[Long, SliceNode]() + private var language: Option[String] = scala.compiletime.uninitialized - def calculateDataFlowSlice(atom: Cpg, config: DataFlowConfig): Option[DataFlowSlice] = - language = atom.metaData.language.headOption - excludeOperatorCalls.set(config.excludeOperatorCalls) + def calculateDataFlowSlice(atom: Cpg, config: DataFlowConfig): Option[DataFlowSlice] = + language = atom.metaData.language.headOption + excludeOperatorCalls.set(config.excludeOperatorCalls) - val dataFlowSlice = (config.fileFilter match - case Some(fileRegex) => atom.call.where(_.file.name(fileRegex)) - case None => atom.call - ) - .where(c => c.callee.isExternal) - .flatMap { - case c - if excludeOperatorCalls.get() && (c.name.startsWith( - " - None - case c => Some(c) - } - .map(c => exec.submit(new TrackDataFlowTask(config, c))) - .flatMap(TimedGet) - .reduceOption { (a, b) => DataFlowSlice(a.nodes ++ b.nodes, a.edges ++ b.edges) } - nodeCache.clear() - dataFlowSlice - end calculateDataFlowSlice + val dataFlowSlice = (config.fileFilter match + case Some(fileRegex) => atom.call.where(_.file.name(fileRegex)) + case None => atom.call + ) + .where(c => c.callee.isExternal) + .flatMap { + case c + if excludeOperatorCalls.get() && (c.name.startsWith( + " + None + case c => Some(c) + } + .map(c => exec.submit(new TrackDataFlowTask(config, c))) + .flatMap(TimedGet) + .reduceOption { (a, b) => DataFlowSlice(a.nodes ++ b.nodes, a.edges ++ b.edges) } + nodeCache.clear() + dataFlowSlice + end calculateDataFlowSlice - protected def TimedGet(dsf: Future[Option[DataFlowSlice]]) = - try - dsf.get(5, TimeUnit.SECONDS) - catch - case _: Throwable => None + protected def TimedGet(dsf: Future[Option[DataFlowSlice]]) = + try + dsf.get(5, TimeUnit.SECONDS) + catch + case _: Throwable => None - /** Convert cfg node to a sliceable node with backing cache - */ - protected def fromCfgNode(cfgNode: CfgNode): SliceNode = - nodeCache.getOrElseUpdate(cfgNode.id(), cfgNodeToSliceNode(cfgNode)) + /** Convert cfg node to a sliceable node with backing cache + */ + protected def fromCfgNode(cfgNode: CfgNode): SliceNode = + nodeCache.getOrElseUpdate(cfgNode.id(), cfgNodeToSliceNode(cfgNode)) - protected def cfgNodeToSliceNode(cfgNode: CfgNode): SliceNode = - val sliceNode = SliceNode( - cfgNode.id(), - cfgNode.label, - code = cfgNode.code, - parentMethodName = cfgNode.method.fullName, - parentMethodSignature = cfgNode.method.signature, - parentFileName = cfgNode.file.name.headOption.getOrElse(""), - parentPackageName = cfgNode.method.location.packageName, - parentClassName = cfgNode.method.location.className, - lineNumber = cfgNode.lineNumber, - columnNumber = cfgNode.columnNumber, - tags = if cfgNode.tag.nonEmpty then cfgNode.tag.name.mkString(", ") else "" - ) - cfgNode match - case n: Call => - sliceNode.copy( - name = n.name, - fullName = n.methodFullName, - isExternal = n.callee.isExternal.headOption.getOrElse(false), - signature = n.callee.signature.headOption.getOrElse(""), - typeFullName = n.typeFullName - ) - case n: Method => - sliceNode.copy( - name = n.name, - fullName = n.fullName, - isExternal = n.isExternal, - signature = n.signature, - typeFullName = n.methodReturn.typeFullName - ) - case n: Return => - sliceNode.copy(name = "RET", typeFullName = n.method.methodReturn.typeFullName) - case n: MethodRef => sliceNode.copy(name = n.methodFullName, code = n.code) - case n: TypeRef => sliceNode.copy(name = n.typeFullName, code = n.code) - case n: Block => - var typeFullName = n.property(PropertyNames.TYPE_FULL_NAME, "") - if typeFullName == "ANY" then - if n.code.startsWith("[") then typeFullName = "Array" - else if n.code.startsWith("{") then typeFullName = "Object" - sliceNode.copy( - name = n.property(PropertyNames.NAME, ""), - fullName = n.property(PropertyNames.FULL_NAME, ""), - isExternal = n.property(PropertyNames.IS_EXTERNAL, false), - typeFullName = typeFullName, - signature = n.property(PropertyNames.SIGNATURE, "") - ) - case n: Identifier => - var typeFullName = n.property(PropertyNames.TYPE_FULL_NAME, "") - if typeFullName == "ANY" then - if n.code.startsWith("[") then typeFullName = "Array" - else if n.code.startsWith("{") then typeFullName = "Object" - sliceNode.copy( - name = n.property(PropertyNames.NAME, ""), - fullName = n.property(PropertyNames.FULL_NAME, ""), - isExternal = n.property(PropertyNames.IS_EXTERNAL, false), - typeFullName = typeFullName, - signature = n.property(PropertyNames.SIGNATURE, "") - ) - case n => - sliceNode.copy( - name = n.property(PropertyNames.NAME, ""), - fullName = n.property(PropertyNames.FULL_NAME, ""), - isExternal = n.property(PropertyNames.IS_EXTERNAL, false), - typeFullName = n.property(PropertyNames.TYPE_FULL_NAME, ""), - signature = n.property(PropertyNames.SIGNATURE, "") - ) - end match - end cfgNodeToSliceNode + protected def cfgNodeToSliceNode(cfgNode: CfgNode): SliceNode = + val sliceNode = SliceNode( + cfgNode.id(), + cfgNode.label, + code = cfgNode.code, + parentMethodName = cfgNode.method.fullName, + parentMethodSignature = cfgNode.method.signature, + parentFileName = cfgNode.file.name.headOption.getOrElse(""), + parentPackageName = cfgNode.method.location.packageName, + parentClassName = cfgNode.method.location.className, + lineNumber = cfgNode.lineNumber, + columnNumber = cfgNode.columnNumber, + tags = if cfgNode.tag.nonEmpty then cfgNode.tag.name.mkString(", ") else "" + ) + cfgNode match + case n: Call => + sliceNode.copy( + name = n.name, + fullName = n.methodFullName, + isExternal = n.callee.isExternal.headOption.getOrElse(false), + signature = n.callee.signature.headOption.getOrElse(""), + typeFullName = n.typeFullName + ) + case n: Method => + sliceNode.copy( + name = n.name, + fullName = n.fullName, + isExternal = n.isExternal, + signature = n.signature, + typeFullName = n.methodReturn.typeFullName + ) + case n: Return => + sliceNode.copy(name = "RET", typeFullName = n.method.methodReturn.typeFullName) + case n: MethodRef => sliceNode.copy(name = n.methodFullName, code = n.code) + case n: TypeRef => sliceNode.copy(name = n.typeFullName, code = n.code) + case n: Block => + var typeFullName = n.property(PropertyNames.TYPE_FULL_NAME, "") + if typeFullName == "ANY" then + if n.code.startsWith("[") then typeFullName = "Array" + else if n.code.startsWith("{") then typeFullName = "Object" + sliceNode.copy( + name = n.property(PropertyNames.NAME, ""), + fullName = n.property(PropertyNames.FULL_NAME, ""), + isExternal = n.property(PropertyNames.IS_EXTERNAL, false), + typeFullName = typeFullName, + signature = n.property(PropertyNames.SIGNATURE, "") + ) + case n: Identifier => + var typeFullName = n.property(PropertyNames.TYPE_FULL_NAME, "") + if typeFullName == "ANY" then + if n.code.startsWith("[") then typeFullName = "Array" + else if n.code.startsWith("{") then typeFullName = "Object" + sliceNode.copy( + name = n.property(PropertyNames.NAME, ""), + fullName = n.property(PropertyNames.FULL_NAME, ""), + isExternal = n.property(PropertyNames.IS_EXTERNAL, false), + typeFullName = typeFullName, + signature = n.property(PropertyNames.SIGNATURE, "") + ) + case n => + sliceNode.copy( + name = n.property(PropertyNames.NAME, ""), + fullName = n.property(PropertyNames.FULL_NAME, ""), + isExternal = n.property(PropertyNames.IS_EXTERNAL, false), + typeFullName = n.property(PropertyNames.TYPE_FULL_NAME, ""), + signature = n.property(PropertyNames.SIGNATURE, "") + ) + end match + end cfgNodeToSliceNode - private class TrackDataFlowTask(config: DataFlowConfig, c: Call) - extends Callable[Option[DataFlowSlice]]: - override def call(): Option[DataFlowSlice] = - val sinks = - config.sinkPatternFilter.map(filter => c.argument.code(filter).l).getOrElse( - c.argument.filterNot(_.isBlock).l - ) - // Slow operation - val sliceNodes = sinks.repeat(_.ddgIn)(_.maxDepth(config.sliceDepth).emit).dedup.l - // This is required to create paths - val sliceNodesIdSet = sliceNodes.id.toSet - // Lazily set up the rest if the filters are satisfied - lazy val sliceEdges = sliceNodes - .flatMap(_.outE) - .filter(x => sliceNodesIdSet.contains(x.inNode().id())) - .map { e => SliceEdge(e.outNode().id(), e.inNode().id(), e.label()) } - .toSet - lazy val slice = Option(DataFlowSlice(sliceNodes.map(fromCfgNode).toSet, sliceEdges)) - if sliceNodes.isEmpty || sliceNodes.size > config.sliceNodesLimit then None else slice - end TrackDataFlowTask + private class TrackDataFlowTask(config: DataFlowConfig, c: Call) + extends Callable[Option[DataFlowSlice]]: + override def call(): Option[DataFlowSlice] = + val sinks = + config.sinkPatternFilter.map(filter => c.argument.code(filter).l).getOrElse( + c.argument.filterNot(_.isBlock).l + ) + // Slow operation + val sliceNodes = sinks.repeat(_.ddgIn)(_.maxDepth(config.sliceDepth).emit).dedup.l + // This is required to create paths + val sliceNodesIdSet = sliceNodes.id.toSet + // Lazily set up the rest if the filters are satisfied + lazy val sliceEdges = sliceNodes + .flatMap(_.outE) + .filter(x => sliceNodesIdSet.contains(x.inNode().id())) + .map { e => SliceEdge(e.outNode().id(), e.inNode().id(), e.label()) } + .toSet + lazy val slice = Option(DataFlowSlice(sliceNodes.map(fromCfgNode).toSet, sliceEdges)) + if sliceNodes.isEmpty || sliceNodes.size > config.sliceNodesLimit then None else slice + end TrackDataFlowTask end DataFlowSlicing diff --git a/src/main/scala/io/appthreat/atom/slicing/ReachableSlicing.scala b/src/main/scala/io/appthreat/atom/slicing/ReachableSlicing.scala index 6f93897..1df118f 100644 --- a/src/main/scala/io/appthreat/atom/slicing/ReachableSlicing.scala +++ b/src/main/scala/io/appthreat/atom/slicing/ReachableSlicing.scala @@ -14,322 +14,323 @@ import scala.collection.mutable.ArrayBuffer object ReachableSlicing: - implicit val semantics: Semantics = DefaultSemantics() - private val engineConfig: EngineConfig = EngineConfig() - implicit val context: EngineContext = EngineContext(semantics, engineConfig) - private def API_TAG = "api" - private def FRAMEWORK_TAG = "framework" + implicit val semantics: Semantics = DefaultSemantics() + private val engineConfig: EngineConfig = EngineConfig() + implicit val context: EngineContext = EngineContext(semantics, engineConfig) + private def API_TAG = "api" + private def FRAMEWORK_TAG = "framework" - private def LIBRARY_CALL_TAG = "library-call" - private def CLI_SOURCE_TAG = "cli-source" - private def DRIVER_SOURCE_TAG = "driver-source" - private def HTTP_TAG = "http" - private def CRYPTO_GENERATE_TAG = "crypto-generate" - private def CRYPTO_ALGORITHM_TAG = "crypto-algorithm" + private def LIBRARY_CALL_TAG = "library-call" + private def CLI_SOURCE_TAG = "cli-source" + private def DRIVER_SOURCE_TAG = "driver-source" + private def HTTP_TAG = "http" + private def CRYPTO_GENERATE_TAG = "crypto-generate" + private def CRYPTO_ALGORITHM_TAG = "crypto-algorithm" - def calculateReachableSlice(atom: Cpg, config: ReachablesConfig): ReachableSlice = - val language = atom.metaData.language.head - def sourceP = atom.tag.name(config.sourceTag).parameter - def sourceI = atom.tag.name(config.sourceTag).identifier - def sink = atom.ret.where(_.tag.name(config.sinkTag)) - var flowsList = sink.reachableByFlows(sourceP, sourceI).map(toSlice).toList - if flowsList.isEmpty then - flowsList = atom.ret.where(_.method.tag.name(config.sourceTag)).reachableByFlows( - sourceP, - sourceI - ).map(toSlice).toList - flowsList ++= - atom.tag.name(API_TAG).parameter.reachableByFlows(atom.tag.name(API_TAG).parameter).map( - toSlice - ).toList - if config.includeCryptoFlows then - if language == Languages.JAVA || language == Languages.JAVASRC then - flowsList ++= atom.tag.name(CRYPTO_GENERATE_TAG).call.reachableByFlows( - atom.tag.name(CRYPTO_ALGORITHM_TAG).literal - ).map(toSlice).toList - else if language == Languages.PYTHON || language == Languages.PYTHONSRC then - flowsList ++= atom.tag.name(CRYPTO_GENERATE_TAG).call.reachableByFlows( - atom.tag.name(CRYPTO_ALGORITHM_TAG).call - ).map(toSlice).toList - // For JavaScript and Python, we need flows between arguments of call nodes to track callbacks and middlewares - if - language == Languages.JSSRC || language == Languages.JAVASCRIPT || language == Languages.PYTHON || language == Languages.PYTHONSRC - then - def dynCallSource = atom.tag.name(config.sourceTag).call.argument.isIdentifier - def dynFrameworkIdentifier = atom.tag.name(FRAMEWORK_TAG).identifier - def dynFrameworkParameter = atom.tag.name(FRAMEWORK_TAG).parameter - def dynSink = atom.tag.name(config.sinkTag).call.argument.isIdentifier - flowsList ++= dynSink - .reachableByFlows(dynCallSource, dynFrameworkIdentifier, dynFrameworkParameter) - .map(toSlice) - .toList - flowsList ++= atom.tag - .name(FRAMEWORK_TAG) - .call - .argument - .reachableByFlows(dynFrameworkParameter, sourceP) - .map(toSlice) - .toList - flowsList ++= atom.tag - .name(FRAMEWORK_TAG) - .call - .argument - .isIdentifier - .reachableByFlows(sourceI, dynFrameworkIdentifier) - .map(toSlice) - .toList - if language == Languages.PYTHON || language == Languages.PYTHONSRC then - flowsList ++= atom.tag.name("pkg.*").identifier.reachableByFlows( - atom.tag.name(CLI_SOURCE_TAG).identifier - ).map(toSlice).toList - else - flowsList ++= atom.tag.name("pkg.*").identifier.reachableByFlows( - atom.tag.name(CLI_SOURCE_TAG).call - ).map(toSlice).toList - end if - if language == Languages.PHP - then - flowsList ++= atom.ret.where(_.tag.name(config.sinkTag)).reachableByFlows( - atom.tag.name(config.sourceTag).parameter - ).map(toSlice).toList - flowsList ++= atom.tag.name(FRAMEWORK_TAG).parameter.reachableByFlows( - atom.tag.name(config.sourceTag).parameter - ).map(toSlice).toList - if language == Languages.NEWC || language == Languages.C - then - flowsList ++= atom.tag.name(LIBRARY_CALL_TAG).call.reachableByFlows(atom.tag.name( - CLI_SOURCE_TAG - ).parameter).map(toSlice).toList - flowsList ++= atom.tag.name(HTTP_TAG).parameter.reachableByFlows(atom.tag.name( - CLI_SOURCE_TAG - ).parameter).map(toSlice).toList - flowsList ++= atom.tag.name(HTTP_TAG).parameter.reachableByFlows(atom.tag.name( - HTTP_TAG - ).parameter).map(toSlice).toList - flowsList ++= atom.tag.name(LIBRARY_CALL_TAG).call.reachableByFlows(atom.tag.name( - DRIVER_SOURCE_TAG - ).parameter).map(toSlice).toList - // Fallback to reverse reachability if we don't get any hits - if flowsList.isEmpty then - println( - s"Falling back to using reverse reachability to determine flows. Max DDG depth used: ${config.sliceDepth}" - ) - flowsList ++= atom.tag.name(LIBRARY_CALL_TAG).call.reachableByFlows( - atom.tag.name( - LIBRARY_CALL_TAG - ).call.method.repeat(_.caller(NoResolve))( - _.maxDepth(config.sliceDepth) - ).parameter - ).map(toSlice).toList - // We still have nothing. Is there any http flows going on? - if flowsList.isEmpty then - flowsList ++= atom.tag.name(HTTP_TAG).parameter.reachableByFlows( - atom.tag.name( - HTTP_TAG - ).parameter.method.repeat(_.caller(NoResolve))( - _.until(_.method.parameter.tag.name(CLI_SOURCE_TAG)) - ).parameter - ).map(toSlice).toList - if flowsList.isEmpty then - flowsList ++= atom.tag.name(LIBRARY_CALL_TAG).parameter.reachableByFlows( - atom.tag.name( - LIBRARY_CALL_TAG - ).parameter.method.repeat(_.caller(NoResolve))( - _.until(_.method.parameter.tag.name(config.sourceTag)) - ).parameter - ).map(toSlice).toList - end if - ReachableSlice(flowsList) - end calculateReachableSlice + def calculateReachableSlice(atom: Cpg, config: ReachablesConfig): ReachableSlice = + val language = atom.metaData.language.head + def sourceP = atom.tag.name(config.sourceTag).parameter + def sourceI = atom.tag.name(config.sourceTag).identifier + def sink = atom.ret.where(_.tag.name(config.sinkTag)) + var flowsList = sink.reachableByFlows(sourceP, sourceI).map(toSlice).toList + if flowsList.isEmpty then + flowsList = atom.ret.where(_.method.tag.name(config.sourceTag)).reachableByFlows( + sourceP, + sourceI + ).map(toSlice).toList + flowsList ++= + atom.tag.name(API_TAG).parameter.reachableByFlows(atom.tag.name(API_TAG).parameter).map( + toSlice + ).toList + if config.includeCryptoFlows then + if language == Languages.JAVA || language == Languages.JAVASRC then + flowsList ++= atom.tag.name(CRYPTO_GENERATE_TAG).call.reachableByFlows( + atom.tag.name(CRYPTO_ALGORITHM_TAG).literal + ).map(toSlice).toList + else if language == Languages.PYTHON || language == Languages.PYTHONSRC then + flowsList ++= atom.tag.name(CRYPTO_GENERATE_TAG).call.reachableByFlows( + atom.tag.name(CRYPTO_ALGORITHM_TAG).call + ).map(toSlice).toList + // For JavaScript and Python, we need flows between arguments of call nodes to track callbacks and middlewares + if + language == Languages.JSSRC || language == Languages.JAVASCRIPT || language == Languages + .PYTHON || language == Languages.PYTHONSRC + then + def dynCallSource = atom.tag.name(config.sourceTag).call.argument.isIdentifier + def dynFrameworkIdentifier = atom.tag.name(FRAMEWORK_TAG).identifier + def dynFrameworkParameter = atom.tag.name(FRAMEWORK_TAG).parameter + def dynSink = atom.tag.name(config.sinkTag).call.argument.isIdentifier + flowsList ++= dynSink + .reachableByFlows(dynCallSource, dynFrameworkIdentifier, dynFrameworkParameter) + .map(toSlice) + .toList + flowsList ++= atom.tag + .name(FRAMEWORK_TAG) + .call + .argument + .reachableByFlows(dynFrameworkParameter, sourceP) + .map(toSlice) + .toList + flowsList ++= atom.tag + .name(FRAMEWORK_TAG) + .call + .argument + .isIdentifier + .reachableByFlows(sourceI, dynFrameworkIdentifier) + .map(toSlice) + .toList + if language == Languages.PYTHON || language == Languages.PYTHONSRC then + flowsList ++= atom.tag.name("pkg.*").identifier.reachableByFlows( + atom.tag.name(CLI_SOURCE_TAG).identifier + ).map(toSlice).toList + else + flowsList ++= atom.tag.name("pkg.*").identifier.reachableByFlows( + atom.tag.name(CLI_SOURCE_TAG).call + ).map(toSlice).toList + end if + if language == Languages.PHP + then + flowsList ++= atom.ret.where(_.tag.name(config.sinkTag)).reachableByFlows( + atom.tag.name(config.sourceTag).parameter + ).map(toSlice).toList + flowsList ++= atom.tag.name(FRAMEWORK_TAG).parameter.reachableByFlows( + atom.tag.name(config.sourceTag).parameter + ).map(toSlice).toList + if language == Languages.NEWC || language == Languages.C + then + flowsList ++= atom.tag.name(LIBRARY_CALL_TAG).call.reachableByFlows(atom.tag.name( + CLI_SOURCE_TAG + ).parameter).map(toSlice).toList + flowsList ++= atom.tag.name(HTTP_TAG).parameter.reachableByFlows(atom.tag.name( + CLI_SOURCE_TAG + ).parameter).map(toSlice).toList + flowsList ++= atom.tag.name(HTTP_TAG).parameter.reachableByFlows(atom.tag.name( + HTTP_TAG + ).parameter).map(toSlice).toList + flowsList ++= atom.tag.name(LIBRARY_CALL_TAG).call.reachableByFlows(atom.tag.name( + DRIVER_SOURCE_TAG + ).parameter).map(toSlice).toList + // Fallback to reverse reachability if we don't get any hits + if flowsList.isEmpty then + println( + s"Falling back to using reverse reachability to determine flows. Max DDG depth used: ${config.sliceDepth}" + ) + flowsList ++= atom.tag.name(LIBRARY_CALL_TAG).call.reachableByFlows( + atom.tag.name( + LIBRARY_CALL_TAG + ).call.method.repeat(_.caller(NoResolve))( + _.maxDepth(config.sliceDepth) + ).parameter + ).map(toSlice).toList + // We still have nothing. Is there any http flows going on? + if flowsList.isEmpty then + flowsList ++= atom.tag.name(HTTP_TAG).parameter.reachableByFlows( + atom.tag.name( + HTTP_TAG + ).parameter.method.repeat(_.caller(NoResolve))( + _.until(_.method.parameter.tag.name(CLI_SOURCE_TAG)) + ).parameter + ).map(toSlice).toList + if flowsList.isEmpty then + flowsList ++= atom.tag.name(LIBRARY_CALL_TAG).parameter.reachableByFlows( + atom.tag.name( + LIBRARY_CALL_TAG + ).parameter.method.repeat(_.caller(NoResolve))( + _.until(_.method.parameter.tag.name(config.sourceTag)) + ).parameter + ).map(toSlice).toList + end if + ReachableSlice(flowsList) + end calculateReachableSlice - private def tagAsString(tag: Iterator[Tag]): String = - if tag.nonEmpty then - tag.name.filterNot(v => v.toUpperCase() == v && v.contains("_")).mkString(", ") - else "" - private def purlsFromTag(tag: Iterator[Tag]) = - if tag.nonEmpty then tag.name.filter(_.startsWith("pkg:")).toSet else Set.empty + private def tagAsString(tag: Iterator[Tag]): String = + if tag.nonEmpty then + tag.name.filterNot(v => v.toUpperCase() == v && v.contains("_")).mkString(", ") + else "" + private def purlsFromTag(tag: Iterator[Tag]) = + if tag.nonEmpty then tag.name.filter(_.startsWith("pkg:")).toSet else Set.empty - private def toSlice(path: Path) = - val tableRows = ArrayBuffer[SliceNode]() - val addedPaths = mutable.Set[String]() - val purls = mutable.Set[String]() - path.elements.foreach { astNode => - val lineNumber = astNode.lineNumber.map(_.intValue()) - val fileName = astNode.file.name.headOption.getOrElse("").replace("", "") - var fileLocation = s"$fileName#$lineNumber" - var tags: String = tagAsString(astNode.tag) - purls ++= purlsFromTag(astNode.tag) - if fileLocation == "#" then fileLocation = "N/A" - var sliceNode = SliceNode( - astNode.id(), - astNode.label, - code = astNode.code, - parentFileName = astNode.file.name.headOption.getOrElse(""), - lineNumber = astNode.lineNumber, - columnNumber = astNode.columnNumber, + private def toSlice(path: Path) = + val tableRows = ArrayBuffer[SliceNode]() + val addedPaths = mutable.Set[String]() + val purls = mutable.Set[String]() + path.elements.foreach { astNode => + val lineNumber = astNode.lineNumber.map(_.intValue()) + val fileName = astNode.file.name.headOption.getOrElse("").replace("", "") + var fileLocation = s"$fileName#$lineNumber" + var tags: String = tagAsString(astNode.tag) + purls ++= purlsFromTag(astNode.tag) + if fileLocation == "#" then fileLocation = "N/A" + var sliceNode = SliceNode( + astNode.id(), + astNode.label, + code = astNode.code, + parentFileName = astNode.file.name.headOption.getOrElse(""), + lineNumber = astNode.lineNumber, + columnNumber = astNode.columnNumber, + tags = tags + ) + astNode match + case _: MethodReturn => + case _: Block => + case methodParameterIn: MethodParameterIn => + val methodName = methodParameterIn.method.name + if tags.isEmpty && methodParameterIn.method.tag.nonEmpty then + tags = tagAsString(methodParameterIn.method.tag) + purls ++= purlsFromTag(methodParameterIn.method.tag) + if tags.isEmpty && methodParameterIn.tag.nonEmpty then + tags = tagAsString(methodParameterIn.tag) + purls ++= purlsFromTag(methodParameterIn.tag) + sliceNode = sliceNode.copy( + name = methodParameterIn.name, + code = methodParameterIn.code, + typeFullName = methodParameterIn.typeFullName, + parentMethodName = methodName, + parentMethodSignature = methodParameterIn.method.signature, + parentPackageName = methodParameterIn.method.location.packageName, + parentClassName = methodParameterIn.method.location.className, + isExternal = methodParameterIn.method.isExternal, + lineNumber = methodParameterIn.lineNumber, + columnNumber = methodParameterIn.columnNumber, tags = tags ) - astNode match - case _: MethodReturn => - case _: Block => - case methodParameterIn: MethodParameterIn => - val methodName = methodParameterIn.method.name - if tags.isEmpty && methodParameterIn.method.tag.nonEmpty then - tags = tagAsString(methodParameterIn.method.tag) - purls ++= purlsFromTag(methodParameterIn.method.tag) - if tags.isEmpty && methodParameterIn.tag.nonEmpty then - tags = tagAsString(methodParameterIn.tag) - purls ++= purlsFromTag(methodParameterIn.tag) - sliceNode = sliceNode.copy( - name = methodParameterIn.name, - code = methodParameterIn.code, - typeFullName = methodParameterIn.typeFullName, - parentMethodName = methodName, - parentMethodSignature = methodParameterIn.method.signature, - parentPackageName = methodParameterIn.method.location.packageName, - parentClassName = methodParameterIn.method.location.className, - isExternal = methodParameterIn.method.isExternal, - lineNumber = methodParameterIn.lineNumber, - columnNumber = methodParameterIn.columnNumber, - tags = tags - ) - tableRows += sliceNode - case ret: Return => - val methodName = ret.method.name - sliceNode = sliceNode.copy( - name = ret.argumentName.getOrElse(""), - code = ret.code, - parentMethodName = methodName, - parentMethodSignature = ret.method.signature, - parentPackageName = ret.method.location.packageName, - parentClassName = ret.method.location.className, - lineNumber = ret.lineNumber, - columnNumber = ret.columnNumber - ) - tableRows += sliceNode - case literal: Literal => - val methodName = literal.method.name - if tags.isEmpty && literal.inCall.nonEmpty && literal.inCall.head.tag.nonEmpty - then - tags = tagAsString(literal.inCall.head.tag) - purls ++= purlsFromTag(literal.inCall.head.tag) - if !addedPaths.contains( - s"$fileName#$lineNumber" - ) - then - sliceNode = sliceNode.copy( - name = literal.code.replaceAll("""(['"])""", ""), - code = literal.code.replaceAll("""(['"])""", ""), - typeFullName = literal.typeFullName, - parentMethodName = methodName, - parentMethodSignature = literal.method.signature, - parentPackageName = literal.method.location.packageName, - parentClassName = literal.method.location.className, - lineNumber = literal.lineNumber, - columnNumber = literal.columnNumber, - tags = tags - ) - tableRows += sliceNode - case identifier: Identifier => - val methodName = identifier.method.name - if tags.isEmpty && identifier.inCall.nonEmpty && identifier.inCall.head.tag.nonEmpty - then - tags = tagAsString(identifier.inCall.head.tag) - purls ++= purlsFromTag(identifier.inCall.head.tag) - if !addedPaths.contains( - s"$fileName#$lineNumber" - ) && identifier.inCall.nonEmpty - then - sliceNode = sliceNode.copy( - name = identifier.name, - code = - if identifier.inCall.nonEmpty then identifier.inCall.head.code - else identifier.code, - parentMethodName = methodName, - parentMethodSignature = identifier.method.signature, - parentPackageName = identifier.method.location.packageName, - parentClassName = identifier.method.location.className, - lineNumber = identifier.lineNumber, - columnNumber = identifier.columnNumber, - tags = tags - ) - tableRows += sliceNode - case member: Member => - val methodName = "" - sliceNode = sliceNode.copy( - name = member.name, - code = member.code, - parentMethodName = methodName + tableRows += sliceNode + case ret: Return => + val methodName = ret.method.name + sliceNode = sliceNode.copy( + name = ret.argumentName.getOrElse(""), + code = ret.code, + parentMethodName = methodName, + parentMethodSignature = ret.method.signature, + parentPackageName = ret.method.location.packageName, + parentClassName = ret.method.location.className, + lineNumber = ret.lineNumber, + columnNumber = ret.columnNumber + ) + tableRows += sliceNode + case literal: Literal => + val methodName = literal.method.name + if tags.isEmpty && literal.inCall.nonEmpty && literal.inCall.head.tag.nonEmpty + then + tags = tagAsString(literal.inCall.head.tag) + purls ++= purlsFromTag(literal.inCall.head.tag) + if !addedPaths.contains( + s"$fileName#$lineNumber" + ) + then + sliceNode = sliceNode.copy( + name = literal.code.replaceAll("""(['"])""", ""), + code = literal.code.replaceAll("""(['"])""", ""), + typeFullName = literal.typeFullName, + parentMethodName = methodName, + parentMethodSignature = literal.method.signature, + parentPackageName = literal.method.location.packageName, + parentClassName = literal.method.location.className, + lineNumber = literal.lineNumber, + columnNumber = literal.columnNumber, + tags = tags + ) + tableRows += sliceNode + case identifier: Identifier => + val methodName = identifier.method.name + if tags.isEmpty && identifier.inCall.nonEmpty && identifier.inCall.head.tag.nonEmpty + then + tags = tagAsString(identifier.inCall.head.tag) + purls ++= purlsFromTag(identifier.inCall.head.tag) + if !addedPaths.contains( + s"$fileName#$lineNumber" + ) && identifier.inCall.nonEmpty + then + sliceNode = sliceNode.copy( + name = identifier.name, + code = + if identifier.inCall.nonEmpty then identifier.inCall.head.code + else identifier.code, + parentMethodName = methodName, + parentMethodSignature = identifier.method.signature, + parentPackageName = identifier.method.location.packageName, + parentClassName = identifier.method.location.className, + lineNumber = identifier.lineNumber, + columnNumber = identifier.columnNumber, + tags = tags + ) + tableRows += sliceNode + case member: Member => + val methodName = "" + sliceNode = sliceNode.copy( + name = member.name, + code = member.code, + parentMethodName = methodName + ) + tableRows += sliceNode + case call: Call => + if !call.code.startsWith(" - if !call.code.startsWith(" - val method = cfgNode.method - if tags.isEmpty && method.tag.nonEmpty then - tags = tagAsString(method.tag) - purls ++= purlsFromTag(method.tag) - val methodName = method.name - val statement = cfgNode match - case _: MethodParameterIn => - if tags.isEmpty && method.parameter.tag.nonEmpty then - tags = tagAsString(method.parameter.tag) - purls ++= purlsFromTag(method.parameter.tag) - val paramsPretty = - method.parameter.toList.sortBy(_.index).map(_.code).mkString(", ") - s"$methodName($paramsPretty)" - case _ => - if tags.isEmpty && cfgNode.statement.tag.nonEmpty then - tags = tagAsString(cfgNode.statement.tag) - purls ++= purlsFromTag(cfgNode.statement.tag) - cfgNode.statement.repr - sliceNode = - sliceNode.copy(parentMethodName = methodName, code = statement, tags = tags) - tableRows += sliceNode - end match - addedPaths += s"$fileName#$lineNumber" - } - ReachableFlows(flows = tableRows.toList, purls = purls.toSet) - end toSlice + then true + else false + if call.methodFullName.startsWith(" + val method = cfgNode.method + if tags.isEmpty && method.tag.nonEmpty then + tags = tagAsString(method.tag) + purls ++= purlsFromTag(method.tag) + val methodName = method.name + val statement = cfgNode match + case _: MethodParameterIn => + if tags.isEmpty && method.parameter.tag.nonEmpty then + tags = tagAsString(method.parameter.tag) + purls ++= purlsFromTag(method.parameter.tag) + val paramsPretty = + method.parameter.toList.sortBy(_.index).map(_.code).mkString(", ") + s"$methodName($paramsPretty)" + case _ => + if tags.isEmpty && cfgNode.statement.tag.nonEmpty then + tags = tagAsString(cfgNode.statement.tag) + purls ++= purlsFromTag(cfgNode.statement.tag) + cfgNode.statement.repr + sliceNode = + sliceNode.copy(parentMethodName = methodName, code = statement, tags = tags) + tableRows += sliceNode + end match + addedPaths += s"$fileName#$lineNumber" + } + ReachableFlows(flows = tableRows.toList, purls = purls.toSet) + end toSlice end ReachableSlicing diff --git a/src/main/scala/io/appthreat/atom/slicing/UsageSlicing.scala b/src/main/scala/io/appthreat/atom/slicing/UsageSlicing.scala index c5716ba..d144e01 100644 --- a/src/main/scala/io/appthreat/atom/slicing/UsageSlicing.scala +++ b/src/main/scala/io/appthreat/atom/slicing/UsageSlicing.scala @@ -18,645 +18,644 @@ import scala.util.Try */ object UsageSlicing: - private val resolver = NoResolve - val exec: ExecutorService = - Executors.newVirtualThreadPerTaskExecutor() - private val constructorTypeMatcher = Pattern.compile(".*new (\\w+)\\(.*") - private val excludeOperatorCalls = new AtomicBoolean(true) - private val FRAMEWORK_ROUTE = "framework-route" - - /** Generates object slices from the given CPG. - * - * @param atom - * the atom to slice. - * @return - * a set of object slices. - */ - def calculateUsageSlice(atom: Cpg, config: UsagesConfig): ProgramSlice = - implicit val implicitConfig: UsagesConfig = config - excludeOperatorCalls.set(config.excludeOperatorCalls) - - def getDeclarations: Traversal[Declaration] = (config.fileFilter match - case Some(fileName) => atom.file.nameExact(fileName).method - case None => atom.method - ).withMethodNameFilter.withMethodParameterFilter.withMethodAnnotationFilter.declaration - - def typeMap = TrieMap.from(atom.typeDecl.map(f => (f.name, f.fullName)).toMap) - val slices = usageSlices(atom, () => getDeclarations, typeMap) - val language = atom.metaData.language.headOption - val userDefTypes = userDefinedTypes(atom) - if language.get == Languages.NEWC || language.get == Languages.C || language.get == Languages.PHP - then - ProgramUsageSlice(slices ++ importsAsSlices(atom), userDefTypes ++ routesAsUDT(atom)) - else if language.get == Languages.PYTHON || language.get == Languages.PYTHONSRC - then - ProgramUsageSlice( - slices ++ externalCalleesAsSlices(atom, typeMap), - userDefTypes ++ routesAsUDT(atom) + private val resolver = NoResolve + val exec: ExecutorService = + Executors.newVirtualThreadPerTaskExecutor() + private val constructorTypeMatcher = Pattern.compile(".*new (\\w+)\\(.*") + private val excludeOperatorCalls = new AtomicBoolean(true) + private val FRAMEWORK_ROUTE = "framework-route" + + /** Generates object slices from the given CPG. + * + * @param atom + * the atom to slice. + * @return + * a set of object slices. + */ + def calculateUsageSlice(atom: Cpg, config: UsagesConfig): ProgramSlice = + implicit val implicitConfig: UsagesConfig = config + excludeOperatorCalls.set(config.excludeOperatorCalls) + + def getDeclarations: Traversal[Declaration] = (config.fileFilter match + case Some(fileName) => atom.file.nameExact(fileName).method + case None => atom.method + ).withMethodNameFilter.withMethodParameterFilter.withMethodAnnotationFilter.declaration + + def typeMap = TrieMap.from(atom.typeDecl.map(f => (f.name, f.fullName)).toMap) + val slices = usageSlices(atom, () => getDeclarations, typeMap) + val language = atom.metaData.language.headOption + val userDefTypes = userDefinedTypes(atom) + if language.get == Languages.NEWC || language.get == Languages.C || language.get == Languages.PHP + then + ProgramUsageSlice(slices ++ importsAsSlices(atom), userDefTypes ++ routesAsUDT(atom)) + else if language.get == Languages.PYTHON || language.get == Languages.PYTHONSRC + then + ProgramUsageSlice( + slices ++ externalCalleesAsSlices(atom, typeMap), + userDefTypes ++ routesAsUDT(atom) + ) + else + ProgramUsageSlice(slices ++ unusedTypeDeclAsSlices(atom), userDefTypes) + end calculateUsageSlice + + import io.shiftleft.semanticcpg.codedumper.CodeDumper.dump + + private def usageSlices( + atom: Cpg, + getDeclIdentifiers: () => Traversal[Declaration], + typeMap: TrieMap[String, String] + )(implicit config: UsagesConfig): List[MethodUsageSlice] = + val language = atom.metaData.language.headOption + val root = atom.metaData.root.headOption + getDeclIdentifiers() + .to(LazyList) + .filterNot(a => a.name.equals("*")) + .filter(a => !a.name.startsWith("_tmp_") && atLeastNCalls(a, config.minNumCalls)) + .map(a => exec.submit(new TrackUsageTask(atom, a, typeMap))) + .flatMap(TimedGet) + .groupBy { case (scope, _) => scope } + .view + .filterNot((m, _) => + m.fullName.startsWith("" + ) || m.name.startsWith("") + ) + .sortBy(_._1.fullName) + .map { case (method, slices) => + MethodUsageSlice( + code = + if config.excludeMethodSource || !better.files.File(method.filename).exists + then "" + else + Try(dump( + method.location, + language, + root, + highlight = false, + withArrow = false + )).getOrElse("") + , + fullName = method.fullName, + signature = method.signature, + fileName = method.filename, + slices = slices.iterator.map(_._2).toSet, + lineNumber = method.lineNumber.map(_.intValue()), + columnNumber = method.columnNumber.map(_.intValue()) ) - else - ProgramUsageSlice(slices ++ unusedTypeDeclAsSlices(atom), userDefTypes) - end calculateUsageSlice - - import io.shiftleft.semanticcpg.codedumper.CodeDumper.dump - - private def usageSlices( - atom: Cpg, - getDeclIdentifiers: () => Traversal[Declaration], - typeMap: TrieMap[String, String] - )(implicit config: UsagesConfig): List[MethodUsageSlice] = - val language = atom.metaData.language.headOption - val root = atom.metaData.root.headOption - getDeclIdentifiers() - .to(LazyList) - .filterNot(a => a.name.equals("*")) - .filter(a => !a.name.startsWith("_tmp_") && atLeastNCalls(a, config.minNumCalls)) - .map(a => exec.submit(new TrackUsageTask(atom, a, typeMap))) - .flatMap(TimedGet) - .groupBy { case (scope, _) => scope } - .view - .filterNot((m, _) => - m.fullName.startsWith("" - ) || m.name.startsWith("") + } + .toList + end usageSlices + + private def cleanupImportCode(code: String) = + if code.startsWith("use") then code else code.replaceAll("\\s*", "") + + private def importLineNumber(im: Import) = + if im.file.nonEmpty && im.file.head.lineNumber.nonEmpty then + im.file.head.lineNumber.map(_.intValue()) + else if im.lineNumber.nonEmpty then im.lineNumber.map(_.intValue()) + else None + + private def importColumnNumber(im: Import) = + if im.file.nonEmpty && im.file.head.columnNumber.nonEmpty then + im.file.head.columnNumber.map(_.intValue()) + else if im.columnNumber.nonEmpty then im.columnNumber.map(_.intValue()) + else None + + private def importsAsSlices(atom: Cpg): List[MethodUsageSlice] = + atom.imports.l.map(im => + MethodUsageSlice( + code = if im.code.nonEmpty then cleanupImportCode(im.code) else "", + fullName = im.importedEntity.get, + signature = im.importedAs.getOrElse(""), + fileName = if im.file.nonEmpty then im.file.head.name else "", + slices = Seq[ObjectUsageSlice]().toSet, + lineNumber = importLineNumber(im), + columnNumber = importColumnNumber(im) + ) + ) + + private def unusedTypeDeclAsSlices(atom: Cpg): List[MethodUsageSlice] = + atom.typeDecl.annotation.filter(_.method.isEmpty).l.map(im => + MethodUsageSlice( + code = if im.code.nonEmpty then im.code.replaceAll("\\s*", "") else "", + fullName = im.fullName, + signature = s"@${im.name}", + fileName = if im.file.nonEmpty then im.file.head.name else "", + slices = Seq[ObjectUsageSlice]().toSet, + lineNumber = + im.lineNumber.map(_.intValue()), + columnNumber = + im.columnNumber.map(_.intValue()) + ) + ) + + private def externalCalleesAsSlices( + atom: Cpg, + typeMap: TrieMap[String, String] + ): List[MethodUsageSlice] = + atom.call + .where(_.callee(NoResolve).isExternal) + .filterNot(_.name.startsWith(" + val taobj = CallDef( + if call.callee(NoResolve).method.nonEmpty then + call.callee(NoResolve).method.head.name + else "", + "", + if call.callee(NoResolve).method.nonEmpty then + Option(call.callee(NoResolve).method.head.fullName) + else Option(""), + Option(call.callee(NoResolve).head.isExternal), + call.callee(NoResolve).head.method.lineNumber.map(_.intValue()), + call.callee(NoResolve).head.method.columnNumber.map(_.intValue()) + ) + val ocall = List( + ObservedCall( + callName = call.name, + resolvedMethod = + if call.callee(NoResolve).method.nonEmpty then + Option(call.callee(NoResolve).method.head.fullName) + else Option(""), + paramTypes = List.empty[String], + returnType = "", + isExternal = Option(true), + lineNumber = call.lineNumber.map(_.intValue()), + columnNumber = call.columnNumber.map(_.intValue()) + ) ) - .sortBy(_._1.fullName) - .map { case (method, slices) => - MethodUsageSlice( - code = - if config.excludeMethodSource || !better.files.File(method.filename).exists - then "" - else - Try(dump( - method.location, - language, - root, - highlight = false, - withArrow = false - )).getOrElse("") - , - fullName = method.fullName, - signature = method.signature, - fileName = method.filename, - slices = slices.iterator.map(_._2).toSet, - lineNumber = method.lineNumber.map(_.intValue()), - columnNumber = method.columnNumber.map(_.intValue()) - ) - } - .toList - end usageSlices - - private def cleanupImportCode(code: String) = - if code.startsWith("use") then code else code.replaceAll("\\s*", "") - - private def importLineNumber(im: Import) = - if im.file.nonEmpty && im.file.head.lineNumber.nonEmpty then - im.file.head.lineNumber.map(_.intValue()) - else if im.lineNumber.nonEmpty then im.lineNumber.map(_.intValue()) - else None - - private def importColumnNumber(im: Import) = - if im.file.nonEmpty && im.file.head.columnNumber.nonEmpty then - im.file.head.columnNumber.map(_.intValue()) - else if im.columnNumber.nonEmpty then im.columnNumber.map(_.intValue()) - else None - - private def importsAsSlices(atom: Cpg): List[MethodUsageSlice] = - atom.imports.l.map(im => MethodUsageSlice( - code = if im.code.nonEmpty then cleanupImportCode(im.code) else "", - fullName = im.importedEntity.get, - signature = im.importedAs.getOrElse(""), - fileName = if im.file.nonEmpty then im.file.head.name else "", - slices = Seq[ObjectUsageSlice]().toSet, - lineNumber = importLineNumber(im), - columnNumber = importColumnNumber(im) + code = "", + fullName = call.method.fullName, + signature = call.method.signature, + fileName = call.method.filename, + slices = Set( + ObjectUsageSlice( + targetObj = taobj, + definedBy = Option(taobj), + invokedCalls = ocall, + argToCalls = List.empty[ObservedCallWithArgPos] + ) + ), + lineNumber = call.method.lineNumber.map(_.intValue()), + columnNumber = call.method.columnNumber.map(_.intValue()) ) + ) + + /** Discovers internally defined routes. + * + * @param atom + * the CPG to query for types. + * @return + * a list of user defined types. + */ + def routesAsUDT(atom: Cpg): List[UserDefinedType] = + + def generateUDT(call: Call): UserDefinedType = + UserDefinedType( + call.name, + call.argument.isLiteral + .map(m => + LocalDef( + name = m.code, + typeFullName = m.typeFullName, + lineNumber = Option( + m.property(new PropertyKey[Integer](PropertyNames.LINE_NUMBER)) + ).map(_.toInt), + columnNumber = Option( + m.property(new PropertyKey[Integer](PropertyNames.COLUMN_NUMBER)) + ).map(_.toInt) + ) + ) + .collectAll[LocalDef] + .l, + call + .callee(NoResolve) + .method + .filterNot(m => m.name.startsWith("")) + .map(m => + ObservedCall( + m.name, + Option(m.fullName), + m.parameter.map(_.typeFullName).toList, + m.methodReturn.typeFullName, + Option(m.isExternal), + m.lineNumber.map(_.intValue()), + m.columnNumber.map(_.intValue()) + ) + ) + .l ++ call.argument.inCall.map(c => + ObservedCall( + c.code.takeWhile(_ != '('), + Option(c.code), + c.argument.map(_.code.replaceAll("\"", "")).toList, + "", + Option(true), + c.lineNumber.map(_.intValue()), + c.columnNumber.map(_.intValue()) + ) + ).l, + call.location.filename, + call.lineNumber.map(_.intValue()), + call.columnNumber.map(_.intValue()) + ) + end generateUDT + + atom.call + .where(_.argument.tag.nameExact(FRAMEWORK_ROUTE)) + .map(generateUDT) + .filter(udt => udt.fields.nonEmpty || udt.procedures.nonEmpty) + .l + end routesAsUDT + + private def TimedGet(dsf: Future[Option[(Method, ObjectUsageSlice)]]) = + try + dsf.get(5, TimeUnit.SECONDS) + catch + case _: Throwable => None + + /** Returns true if the given declaration is found to have at least n non-operator calls within + * its referenced identifiers' scope. + * + * @param decl + * the declaration to check. + * @param n + * number of calls. + * @return + * true if the call count condition is satisfied. + */ + private def atLeastNCalls(decl: Declaration, n: Int): Boolean = + decl.label == "METHOD" || decl.name.contains("init") || getInCallsForReferencedIdentifiers( + decl + ).size >= n + + private def getInCallsForReferencedIdentifiers(decl: Declaration): List[Call] = + // Cross closure boundaries + val capturedVars = + decl.capturedByMethodRef.referencedMethod.ast.isIdentifier.nameExact(decl.name) + decl + .flatMap { + case local: Local => local.referencingIdentifiers ++ capturedVars + case param: MethodParameterIn => param.referencingIdentifiers ++ capturedVars + case m: Method => m.callIn.argument.isIdentifier + case _ => Seq() + } + .inCall + .flatMap { + case c + if c.name.startsWith(Operators.assignment) && c.ast.isCall.name( + Operators.alloc + ).nonEmpty => Some(c) + case c if excludeOperatorCalls.get() && c.name.startsWith(" None + case c => Some(c) + } + .dedup + .toList + end getInCallsForReferencedIdentifiers + + /** Discovers internally defined types. + * + * @param atom + * the CPG to query for types. + * @return + * a list of user defined types. + */ + def userDefinedTypes(atom: Cpg): List[UserDefinedType] = + + def generateUDT(typeDecl: TypeDecl): UserDefinedType = + UserDefinedType( + typeDecl.fullName, + typeDecl.member.map(m => DefComponent.fromNode(m, null)).collectAll[LocalDef].l, + typeDecl.method + .filterNot(m => m.name.startsWith("")) + .map(m => + ObservedCall( + m.name, + Option(m.fullName), + m.parameter.map(_.typeFullName).toList, + m.methodReturn.typeFullName, + Option(m.isExternal), + m.lineNumber.map(_.intValue()), + m.columnNumber.map(_.intValue()) + ) + ) + .l, + typeDecl.filename, + typeDecl.lineNumber.map(_.intValue()), + typeDecl.columnNumber.map(_.intValue()) ) - private def unusedTypeDeclAsSlices(atom: Cpg): List[MethodUsageSlice] = - atom.typeDecl.annotation.filter(_.method.isEmpty).l.map(im => - MethodUsageSlice( - code = if im.code.nonEmpty then im.code.replaceAll("\\s*", "") else "", - fullName = im.fullName, - signature = s"@${im.name}", - fileName = if im.file.nonEmpty then im.file.head.name else "", - slices = Seq[ObjectUsageSlice]().toSet, - lineNumber = - im.lineNumber.map(_.intValue()), - columnNumber = - im.columnNumber.map(_.intValue()) + atom.typeDecl + .filterNot(t => + t.isExternal || t.name.matches( + "(:program||||||)" ) ) - - private def externalCalleesAsSlices( - atom: Cpg, - typeMap: TrieMap[String, String] - ): List[MethodUsageSlice] = - atom.call - .where(_.callee(NoResolve).isExternal) - .filterNot(_.name.startsWith(" - val taobj = CallDef( - if call.callee(NoResolve).method.nonEmpty then - call.callee(NoResolve).method.head.name - else "", - "", - if call.callee(NoResolve).method.nonEmpty then - Option(call.callee(NoResolve).method.head.fullName) - else Option(""), - Option(call.callee(NoResolve).head.isExternal), - call.callee(NoResolve).head.method.lineNumber.map(_.intValue()), - call.callee(NoResolve).head.method.columnNumber.map(_.intValue()) + .map(generateUDT) + .filter(udt => udt.fields.nonEmpty || udt.procedures.nonEmpty) + .l + end userDefinedTypes + + private class TrackUsageTask(atom: Cpg, tgt: Declaration, typeMap: TrieMap[String, String])( + implicit config: UsagesConfig + ) extends Callable[Option[(Method, ObjectUsageSlice)]]: + + override def call(): Option[(Method, ObjectUsageSlice)] = + val defNode = tgt match + case local: Local => + local.referencingIdentifiers.inCall.astParent.assignment + .where(_.argument(1).code(tgt.name)) + .argument(2) + .headOption match + // In the case of a constructor, we should get the "new" call + case Some(block: Block) => + block.ast.isCall.or( + _.nameExact(".new"), + _.name(".*__init__.*") + ).lastOption + case x => x + case x => Some(x) + + (tgt, defNode, partitionInvolvementInCalls) match + // Case 1: Generated by variable assignment + case (local: Local, Some(genCall: Call), (invokedCalls, argToCalls)) => + Option( + ( + local.method.head, + ObjectUsageSlice( + targetObj = createDefComponent(local, genCall), + definedBy = Option(createDefComponent(genCall, null)), + invokedCalls = invokedCalls, + argToCalls = argToCalls ) - val ocall = List( - ObservedCall( - callName = call.name, - resolvedMethod = - if call.callee(NoResolve).method.nonEmpty then - Option(call.callee(NoResolve).method.head.fullName) - else Option(""), - paramTypes = List.empty[String], - returnType = "", - isExternal = Option(true), - lineNumber = call.lineNumber.map(_.intValue()), - columnNumber = call.columnNumber.map(_.intValue()) - ) + ) + ) + // Case 2: Generated by incoming parameter + case (param: MethodParameterIn, _, (invokedCalls, argToCalls)) + if !param.name.matches("(this|self)") => + Option( + ( + param.method, + ObjectUsageSlice( + targetObj = createDefComponent(param, null), + definedBy = Option(createDefComponent(param, null)), + invokedCalls = invokedCalls, + argToCalls = argToCalls ) - MethodUsageSlice( - code = "", - fullName = call.method.fullName, - signature = call.method.signature, - fileName = call.method.filename, - slices = Set( - ObjectUsageSlice( - targetObj = taobj, - definedBy = Option(taobj), - invokedCalls = ocall, - argToCalls = List.empty[ObservedCallWithArgPos] + ) + ) + case (m: Method, _, (invokedCalls, argToCalls)) => + var method = m + val defComp = createDefComponent(m, null) + if method.filename == "" && defComp.label == "CALL" && method.callIn.nonEmpty + then + method = method.callIn.head.method + val annotationCalls = m.annotation + .map(a => + ObservedCall( + if a.fullName.nonEmpty then a.fullName else a.name, + if a.code.nonEmpty then Option(a.code) else Option(a.fullName), + List.empty, + "", + Option(m.isExternal), + a.lineNumber.map(_.intValue()), + a.columnNumber.map(_.intValue()) ) - ), - lineNumber = call.method.lineNumber.map(_.intValue()), - columnNumber = call.method.columnNumber.map(_.intValue()) ) + .toList + Option( + method, + ObjectUsageSlice( + targetObj = defComp, + definedBy = Option(defComp), + invokedCalls = invokedCalls ++ annotationCalls, + argToCalls = argToCalls + ) ) + case _ => + None + end match + end call + + private def partitionInvolvementInCalls: (List[ObservedCall], List[ObservedCallWithArgPos]) = + val (invokedCalls, argToCalls) = getInCallsForReferencedIdentifiers(tgt) + .sortBy(f => (f.lineNumber, f.columnNumber)) + .flatMap(c => c.argument.find(p => p.code == tgt.name).map(f => (c, f)).headOption) + .map { case (c, arg) => + if arg.argumentName.isDefined then (c, arg, Left(arg.argumentName.get)) + else (c, arg, Right(arg.argumentIndex)) + } + .partition { case (_, _, argIdx) => + argIdx match + case Left(_) => false // receivers/bases are never named + case Right(argIdx) => argIdx == 0 + } + ( + invokedCalls.map(_._1).isCall.flatMap(exprToObservedCall).toList, + argToCalls + .flatMap { case (c: Call, _, argAt: Either[String, Int]) => + exprToObservedCall(c).map(oc => + ObservedCallWithArgPos.fromObservedCall(oc, argAt) + ) + } + ) + end partitionInvolvementInCalls - /** Discovers internally defined routes. + /** Will attempt to get the API call from the expression if this is a procedure call. * - * @param atom - * the CPG to query for types. + * @param baseCall + * the expression to extract the API call from. * @return - * a list of user defined types. + * an API call if present. */ - def routesAsUDT(atom: Cpg): List[UserDefinedType] = - - def generateUDT(call: Call): UserDefinedType = - UserDefinedType( - call.name, - call.argument.isLiteral - .map(m => - LocalDef( - name = m.code, - typeFullName = m.typeFullName, - lineNumber = Option( - m.property(new PropertyKey[Integer](PropertyNames.LINE_NUMBER)) - ).map(_.toInt), - columnNumber = Option( - m.property(new PropertyKey[Integer](PropertyNames.COLUMN_NUMBER)) - ).map(_.toInt) - ) - ) - .collectAll[LocalDef] - .l, - call - .callee(NoResolve) - .method - .filterNot(m => m.name.startsWith("")) - .map(m => - ObservedCall( - m.name, - Option(m.fullName), - m.parameter.map(_.typeFullName).toList, - m.methodReturn.typeFullName, - Option(m.isExternal), - m.lineNumber.map(_.intValue()), - m.columnNumber.map(_.intValue()) - ) - ) - .l ++ call.argument.inCall.map(c => - ObservedCall( - c.code.takeWhile(_ != '('), - Option(c.code), - c.argument.map(_.code.replaceAll("\"", "")).toList, - "", - Option(true), - c.lineNumber.map(_.intValue()), - c.columnNumber.map(_.intValue()) - ) - ).l, - call.location.filename, - call.lineNumber.map(_.intValue()), - call.columnNumber.map(_.intValue()) - ) - end generateUDT - - atom.call - .where(_.argument.tag.nameExact(FRAMEWORK_ROUTE)) - .map(generateUDT) - .filter(udt => udt.fields.nonEmpty || udt.procedures.nonEmpty) - .l - end routesAsUDT - - private def TimedGet(dsf: Future[Option[(Method, ObjectUsageSlice)]]) = - try - dsf.get(5, TimeUnit.SECONDS) - catch - case _: Throwable => None - - /** Returns true if the given declaration is found to have at least n non-operator calls within - * its referenced identifiers' scope. - * - * @param decl - * the declaration to check. - * @param n - * number of calls. - * @return - * true if the call count condition is satisfied. - */ - private def atLeastNCalls(decl: Declaration, n: Int): Boolean = - decl.label == "METHOD" || decl.name.contains("init") || getInCallsForReferencedIdentifiers( - decl - ).size >= n - - private def getInCallsForReferencedIdentifiers(decl: Declaration): List[Call] = - // Cross closure boundaries - val capturedVars = - decl.capturedByMethodRef.referencedMethod.ast.isIdentifier.nameExact(decl.name) - decl - .flatMap { - case local: Local => local.referencingIdentifiers ++ capturedVars - case param: MethodParameterIn => param.referencingIdentifiers ++ capturedVars - case m: Method => m.callIn.argument.isIdentifier - case _ => Seq() - } - .inCall + private def exprToObservedCall(baseCall: Call): Option[ObservedCall] = + val language = atom.metaData.language.headOption + val isMemberInvocation = baseCall.name.equals(Operators.fieldAccess) + val isConstructor = + baseCall.name.equals(Operators.alloc) || baseCall.ast.isCall.nameExact( + Operators.alloc + ).nonEmpty + + def getResolvedMethod(x: Call): Option[String] = + if + DefComponent.unresolvedCallPattern.matcher(x.methodFullName).matches() + then None + else Option(x.methodFullName) + + // Handle the case where a call is an invocation of a field member (lambda) or function/method call + var (callName, resolvedMethod): (Option[String], Option[String]) = + if isMemberInvocation then + baseCall.argumentOut + .flatMap { + case x: FieldIdentifier => + Option(Option(x.code) -> None) + case x: Call => Option(Option(x.name) -> getResolvedMethod(x)) + case _ => None + } + .headOption + .getOrElse((None, None)) + else if isConstructor then + val m = constructorTypeMatcher.matcher(baseCall.code) + val typeName = + if m.find() then m.group(1) + else baseCall.code.stripPrefix("new ").takeWhile(!_.equals('(')) + Option(typeName) -> typeMap.get(typeName) + else Option(baseCall.name) -> getResolvedMethod(baseCall) + + if callName.isEmpty then return None + + val params = (if isMemberInvocation then baseCall.inCall.argument + else if isConstructor then + baseCall.ast.isCall + .nameExact(".new") + .lastOption + .map(_.argument) + .getOrElse(Iterator.empty) + else baseCall.argument) + .collect { case n: Expression if n.argumentIndex > 0 => n } + .flatMap { + case _: MethodRef => Option("LAMBDA") + case x => + Option( + x.property( + PropertyNames.TYPE_FULL_NAME, + x.property(PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, Seq("ANY")).headOption + ) + ) + } + .collect { case x: String => x } + .toList + // Not sure how we can get the return type unless it's typescript or we can resolve the callee? + val returnType = if isConstructor then + resolvedMethod match + case Some(methodFullName) => methodFullName + case None => "ANY" + else + baseCall.argumentOut .flatMap { - case c - if c.name.startsWith(Operators.assignment) && c.ast.isCall.name( - Operators.alloc - ).nonEmpty => Some(c) - case c if excludeOperatorCalls.get() && c.name.startsWith(" None - case c => Some(c) + case x: Call + if !DefComponent.unresolvedCallPattern.matcher( + x.methodFullName + ).matches() => + atom.method.fullNameExact( + x.methodFullName + ).methodReturn.typeFullName.headOption + case x: Call => + x.callee(resolver).methodReturn.typeFullName.headOption + case _ => None } - .dedup - .toList - end getInCallsForReferencedIdentifiers - - /** Discovers internally defined types. - * - * @param atom - * the CPG to query for types. - * @return - * a list of user defined types. - */ - def userDefinedTypes(atom: Cpg): List[UserDefinedType] = - - def generateUDT(typeDecl: TypeDecl): UserDefinedType = - UserDefinedType( - typeDecl.fullName, - typeDecl.member.map(m => DefComponent.fromNode(m, null)).collectAll[LocalDef].l, - typeDecl.method - .filterNot(m => m.name.startsWith("")) - .map(m => - ObservedCall( - m.name, - Option(m.fullName), - m.parameter.map(_.typeFullName).toList, - m.methodReturn.typeFullName, - Option(m.isExternal), - m.lineNumber.map(_.intValue()), - m.columnNumber.map(_.intValue()) - ) - ) - .l, - typeDecl.filename, - typeDecl.lineNumber.map(_.intValue()), - typeDecl.columnNumber.map(_.intValue()) + .headOption + .getOrElse("ANY") + // If resolvedMethod is null then use the code property to construct the method name + if baseCall.code.nonEmpty && baseCall.code.contains( + "(" + ) && language.get == Languages.JSSRC + then + var baseCallCode = baseCall.code.takeWhile(_ != '(') + if baseCallCode.contains(" ") then + baseCallCode = baseCallCode.split(" ").last + // Retain the full code for route detection purposes + if language.get == Languages.JSSRC then + if baseCallCode.startsWith( + "app.use" ) + then + if baseCall.argument.nonEmpty && baseCall.argument.isLiteral.nonEmpty + then + baseCallCode = + baseCall.argument.isLiteral.filterNot(_.code == "*").head.code + else if baseCallCode.startsWith( + "route" + ) || baseCallCode.startsWith("app") + then + baseCallCode = baseCall.code + .replaceAll("\n", "\\n").replaceAll( + " {4}", + " {2}" + ).replaceAll(" {2}", "\\t") + end if + resolvedMethod = Option(baseCallCode) + end if + Option( + ObservedCall( + callName.get, + resolvedMethod, + params, + returnType, + baseCall.callee(resolver).isExternal.headOption, + baseCall.lineNumber.map(_.intValue()), + baseCall.columnNumber.map(_.intValue()) + ) + ) + end exprToObservedCall - atom.typeDecl - .filterNot(t => - t.isExternal || t.name.matches( - "(:program||||||)" - ) - ) - .map(generateUDT) - .filter(udt => udt.fields.nonEmpty || udt.procedures.nonEmpty) - .l - end userDefinedTypes - - private class TrackUsageTask(atom: Cpg, tgt: Declaration, typeMap: TrieMap[String, String])( - implicit config: UsagesConfig - ) extends Callable[Option[(Method, ObjectUsageSlice)]]: - - override def call(): Option[(Method, ObjectUsageSlice)] = - val defNode = tgt match - case local: Local => - local.referencingIdentifiers.inCall.astParent.assignment - .where(_.argument(1).code(tgt.name)) - .argument(2) - .headOption match - // In the case of a constructor, we should get the "new" call - case Some(block: Block) => - block.ast.isCall.or( - _.nameExact(".new"), - _.name(".*__init__.*") - ).lastOption - case x => x - case x => Some(x) - - (tgt, defNode, partitionInvolvementInCalls) match - // Case 1: Generated by variable assignment - case (local: Local, Some(genCall: Call), (invokedCalls, argToCalls)) => - Option( - ( - local.method.head, - ObjectUsageSlice( - targetObj = createDefComponent(local, genCall), - definedBy = Option(createDefComponent(genCall, null)), - invokedCalls = invokedCalls, - argToCalls = argToCalls - ) - ) - ) - // Case 2: Generated by incoming parameter - case (param: MethodParameterIn, _, (invokedCalls, argToCalls)) - if !param.name.matches("(this|self)") => - Option( - ( - param.method, - ObjectUsageSlice( - targetObj = createDefComponent(param, null), - definedBy = Option(createDefComponent(param, null)), - invokedCalls = invokedCalls, - argToCalls = argToCalls - ) - ) - ) - case (m: Method, _, (invokedCalls, argToCalls)) => - var method = m - val defComp = createDefComponent(m, null) - if method.filename == "" && defComp.label == "CALL" && method.callIn.nonEmpty - then - method = method.callIn.head.method - val annotationCalls = m.annotation - .map(a => - ObservedCall( - if a.fullName.nonEmpty then a.fullName else a.name, - if a.code.nonEmpty then Option(a.code) else Option(a.fullName), - List.empty, - "", - Option(m.isExternal), - a.lineNumber.map(_.intValue()), - a.columnNumber.map(_.intValue()) - ) - ) - .toList - Option( - method, - ObjectUsageSlice( - targetObj = defComp, - definedBy = Option(defComp), - invokedCalls = invokedCalls ++ annotationCalls, - argToCalls = argToCalls - ) - ) + /** Creates a def component with the workaround of correcting the type full name if it is only a + * type name. + */ + private def createDefComponent(node: AstNode, definedCallNode: AstNode) = + DefComponent.fromNode(node, definedCallNode, typeMap.toMap) + + // TODO: Slicing may run before post-processing so we cannot assume a call graph + // implement this in a next step to combine slices + @unused + private def linkSlices(slices: Map[String, Set[ObjectUsageSlice]]): Unit = + slices.foreach { case (_, usageSlices) => + usageSlices.foreach { slice => + slice.definedBy match + case Some(CallDef(_, _, Some(resolvedMethod), _, _, _, _)) => + slices.get(resolvedMethod) match + case Some(_) => // TODO: Handle match + case None => // No match case _ => - None - end match - end call - - private def partitionInvolvementInCalls - : (List[ObservedCall], List[ObservedCallWithArgPos]) = - val (invokedCalls, argToCalls) = getInCallsForReferencedIdentifiers(tgt) - .sortBy(f => (f.lineNumber, f.columnNumber)) - .flatMap(c => c.argument.find(p => p.code == tgt.name).map(f => (c, f)).headOption) - .map { case (c, arg) => - if arg.argumentName.isDefined then (c, arg, Left(arg.argumentName.get)) - else (c, arg, Right(arg.argumentIndex)) - } - .partition { case (_, _, argIdx) => - argIdx match - case Left(_) => false // receivers/bases are never named - case Right(argIdx) => argIdx == 0 - } - ( - invokedCalls.map(_._1).isCall.flatMap(exprToObservedCall).toList, - argToCalls - .flatMap { case (c: Call, _, argAt: Either[String, Int]) => - exprToObservedCall(c).map(oc => - ObservedCallWithArgPos.fromObservedCall(oc, argAt) - ) + slice.argToCalls + .flatMap { + case ObservedCallWithArgPos( + _, + Some(resolvedMethod), + _, + _, + Left(argName), + _, + _, + _ + ) => + slices.get(resolvedMethod).flatMap { calleeSlices => + calleeSlices.find { s => + s.targetObj match + case p: ParamDef => p.name == argName + case _ => false + } + } + case ObservedCallWithArgPos( + _, + Some(resolvedMethod), + _, + _, + Right(argIdx), + _, + _, + _ + ) => + slices.get(resolvedMethod).flatMap { calleeSlices => + calleeSlices.find { s => + s.targetObj match + case p: ParamDef => p.position == argIdx + case _ => false + } + } + case _ => None + } + .foreach { s => + // todo: Handle slice linking } - ) - end partitionInvolvementInCalls - - /** Will attempt to get the API call from the expression if this is a procedure call. - * - * @param baseCall - * the expression to extract the API call from. - * @return - * an API call if present. - */ - private def exprToObservedCall(baseCall: Call): Option[ObservedCall] = - val language = atom.metaData.language.headOption - val isMemberInvocation = baseCall.name.equals(Operators.fieldAccess) - val isConstructor = - baseCall.name.equals(Operators.alloc) || baseCall.ast.isCall.nameExact( - Operators.alloc - ).nonEmpty - - def getResolvedMethod(x: Call): Option[String] = - if - DefComponent.unresolvedCallPattern.matcher(x.methodFullName).matches() - then None - else Option(x.methodFullName) - - // Handle the case where a call is an invocation of a field member (lambda) or function/method call - var (callName, resolvedMethod): (Option[String], Option[String]) = - if isMemberInvocation then - baseCall.argumentOut - .flatMap { - case x: FieldIdentifier => - Option(Option(x.code) -> None) - case x: Call => Option(Option(x.name) -> getResolvedMethod(x)) - case _ => None - } - .headOption - .getOrElse((None, None)) - else if isConstructor then - val m = constructorTypeMatcher.matcher(baseCall.code) - val typeName = - if m.find() then m.group(1) - else baseCall.code.stripPrefix("new ").takeWhile(!_.equals('(')) - Option(typeName) -> typeMap.get(typeName) - else Option(baseCall.name) -> getResolvedMethod(baseCall) - - if callName.isEmpty then return None - - val params = (if isMemberInvocation then baseCall.inCall.argument - else if isConstructor then - baseCall.ast.isCall - .nameExact(".new") - .lastOption - .map(_.argument) - .getOrElse(Iterator.empty) - else baseCall.argument) - .collect { case n: Expression if n.argumentIndex > 0 => n } - .flatMap { - case _: MethodRef => Option("LAMBDA") - case x => - Option( - x.property( - PropertyNames.TYPE_FULL_NAME, - x.property(PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, Seq("ANY")).headOption - ) - ) - } - .collect { case x: String => x } - .toList - // Not sure how we can get the return type unless it's typescript or we can resolve the callee? - val returnType = if isConstructor then - resolvedMethod match - case Some(methodFullName) => methodFullName - case None => "ANY" - else - baseCall.argumentOut - .flatMap { - case x: Call - if !DefComponent.unresolvedCallPattern.matcher( - x.methodFullName - ).matches() => - atom.method.fullNameExact( - x.methodFullName - ).methodReturn.typeFullName.headOption - case x: Call => - x.callee(resolver).methodReturn.typeFullName.headOption - case _ => None - } - .headOption - .getOrElse("ANY") - // If resolvedMethod is null then use the code property to construct the method name - if baseCall.code.nonEmpty && baseCall.code.contains( - "(" - ) && language.get == Languages.JSSRC - then - var baseCallCode = baseCall.code.takeWhile(_ != '(') - if baseCallCode.contains(" ") then - baseCallCode = baseCallCode.split(" ").last - // Retain the full code for route detection purposes - if language.get == Languages.JSSRC then - if baseCallCode.startsWith( - "app.use" - ) - then - if baseCall.argument.nonEmpty && baseCall.argument.isLiteral.nonEmpty - then - baseCallCode = - baseCall.argument.isLiteral.filterNot(_.code == "*").head.code - else if baseCallCode.startsWith( - "route" - ) || baseCallCode.startsWith("app") - then - baseCallCode = baseCall.code - .replaceAll("\n", "\\n").replaceAll( - " {4}", - " {2}" - ).replaceAll(" {2}", "\\t") - end if - resolvedMethod = Option(baseCallCode) - end if - Option( - ObservedCall( - callName.get, - resolvedMethod, - params, - returnType, - baseCall.callee(resolver).isExternal.headOption, - baseCall.lineNumber.map(_.intValue()), - baseCall.columnNumber.map(_.intValue()) - ) - ) - end exprToObservedCall - - /** Creates a def component with the workaround of correcting the type full name if it is - * only a type name. - */ - private def createDefComponent(node: AstNode, definedCallNode: AstNode) = - DefComponent.fromNode(node, definedCallNode, typeMap.toMap) - - // TODO: Slicing may run before post-processing so we cannot assume a call graph - // implement this in a next step to combine slices - @unused - private def linkSlices(slices: Map[String, Set[ObjectUsageSlice]]): Unit = - slices.foreach { case (_, usageSlices) => - usageSlices.foreach { slice => - slice.definedBy match - case Some(CallDef(_, _, Some(resolvedMethod), _, _, _, _)) => - slices.get(resolvedMethod) match - case Some(_) => // TODO: Handle match - case None => // No match - case _ => - slice.argToCalls - .flatMap { - case ObservedCallWithArgPos( - _, - Some(resolvedMethod), - _, - _, - Left(argName), - _, - _, - _ - ) => - slices.get(resolvedMethod).flatMap { calleeSlices => - calleeSlices.find { s => - s.targetObj match - case p: ParamDef => p.name == argName - case _ => false - } - } - case ObservedCallWithArgPos( - _, - Some(resolvedMethod), - _, - _, - Right(argIdx), - _, - _, - _ - ) => - slices.get(resolvedMethod).flatMap { calleeSlices => - calleeSlices.find { s => - s.targetObj match - case p: ParamDef => p.position == argIdx - case _ => false - } - } - case _ => None - } - .foreach { s => - // todo: Handle slice linking - } - } } - end TrackUsageTask + } + end TrackUsageTask - /** Adds extensions to extract all assignments from method bodies. - */ - implicit class MethodDataSourceExt(trav: Iterator[Method]): - def declaration: Iterator[Declaration] = trav.ast.collectAll[Declaration] + /** Adds extensions to extract all assignments from method bodies. + */ + implicit class MethodDataSourceExt(trav: Iterator[Method]): + def declaration: Iterator[Declaration] = trav.ast.collectAll[Declaration] end UsageSlicing diff --git a/src/main/scala/io/appthreat/atom/slicing/package.scala b/src/main/scala/io/appthreat/atom/slicing/package.scala index 3c40e2a..45d66ca 100644 --- a/src/main/scala/io/appthreat/atom/slicing/package.scala +++ b/src/main/scala/io/appthreat/atom/slicing/package.scala @@ -12,744 +12,744 @@ import scala.collection.concurrent.TrieMap package object slicing: - import cats.syntax.functor.* - import io.circe.generic.auto.* - import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} - import io.circe.syntax.EncoderOps + import cats.syntax.functor.* + import io.circe.generic.auto.* + import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} + import io.circe.syntax.EncoderOps - trait BaseConfig: + trait BaseConfig: - var inputPath: File = File("app.atom") + var inputPath: File = File("app.atom") - var outputSliceFile: File = File("slices") + var outputSliceFile: File = File("slices") - private var dummyTypesEnabled: Boolean = false + private var dummyTypesEnabled: Boolean = false - var fileFilter: Option[String] = None + var fileFilter: Option[String] = None - var methodNameFilter: Option[String] = None + var methodNameFilter: Option[String] = None - var methodParamTypeFilter: Option[String] = None + var methodParamTypeFilter: Option[String] = None - var methodAnnotationFilter: Option[String] = None + var methodAnnotationFilter: Option[String] = None - def withInputPath(x: File): BaseConfig = - this.inputPath = x - this + def withInputPath(x: File): BaseConfig = + this.inputPath = x + this - def withOutputSliceFile(x: File): BaseConfig = - this.outputSliceFile = x - this + def withOutputSliceFile(x: File): BaseConfig = + this.outputSliceFile = x + this - def withDummyTypesEnabled(x: Boolean): BaseConfig = - this.dummyTypesEnabled = x - this + def withDummyTypesEnabled(x: Boolean): BaseConfig = + this.dummyTypesEnabled = x + this - def withFileFilter(x: Option[String]): BaseConfig = - this.fileFilter = x - this - - def withMethodNameFilter(x: Option[String]): BaseConfig = - this.methodNameFilter = x - this - - def withMethodParamTypeFilter(x: Option[String]): BaseConfig = - this.methodParamTypeFilter = x - this - - def withMethodAnnotationFilter(x: Option[String]): BaseConfig = - this.methodParamTypeFilter = x - this - end BaseConfig - - case class DefaultSliceConfig() extends BaseConfig - - case class DataFlowConfig( - sinkPatternFilter: Option[String] = None, - mustEndAtExternalMethod: Boolean = true, - excludeOperatorCalls: Boolean = true, - sliceDepth: Int = 7, - sliceNodesLimit: Int = 200 - ) extends BaseConfig - - case class UsagesConfig( - minNumCalls: Int = 1, - excludeOperatorCalls: Boolean = true, - excludeMethodSource: Boolean = true, - extractEndpoints: Boolean = false - ) extends BaseConfig - - case class ReachablesConfig( - sourceTag: String, - sinkTag: String, - sliceDepth: Int, - includeCryptoFlows: Boolean - ) extends BaseConfig - - /** Adds extensions to modify a method traversal based on config options - */ - implicit class MethodFilterExt(trav: Iterator[Method]): - - def withMethodNameFilter(implicit config: BaseConfig): Iterator[Method] = - config.methodNameFilter match - case Some(filter) => trav.name(filter) - case None => trav - - def withMethodParameterFilter(implicit config: BaseConfig): Iterator[Method] = - config.methodParamTypeFilter match - case Some(filter) => trav.where(_.parameter.evalType(filter)) - case None => trav - - def withMethodAnnotationFilter(implicit config: BaseConfig): Iterator[Method] = - config.methodAnnotationFilter match - case Some(filter) => trav.where(_.annotation.code(filter)) - case None => trav - - /** A trait for all objects that represent a 1:1 relationship between the CPG and all the slices - * extracted. - */ - sealed trait ProgramSlice: - - def toJson: String - - def toJsonPretty: String - - /** A data-flow slice vector for a given backwards intraprocedural path. + def withFileFilter(x: Option[String]): BaseConfig = + this.fileFilter = x + this + + def withMethodNameFilter(x: Option[String]): BaseConfig = + this.methodNameFilter = x + this + + def withMethodParamTypeFilter(x: Option[String]): BaseConfig = + this.methodParamTypeFilter = x + this + + def withMethodAnnotationFilter(x: Option[String]): BaseConfig = + this.methodParamTypeFilter = x + this + end BaseConfig + + case class DefaultSliceConfig() extends BaseConfig + + case class DataFlowConfig( + sinkPatternFilter: Option[String] = None, + mustEndAtExternalMethod: Boolean = true, + excludeOperatorCalls: Boolean = true, + sliceDepth: Int = 7, + sliceNodesLimit: Int = 200 + ) extends BaseConfig + + case class UsagesConfig( + minNumCalls: Int = 1, + excludeOperatorCalls: Boolean = true, + excludeMethodSource: Boolean = true, + extractEndpoints: Boolean = false + ) extends BaseConfig + + case class ReachablesConfig( + sourceTag: String, + sinkTag: String, + sliceDepth: Int, + includeCryptoFlows: Boolean + ) extends BaseConfig + + /** Adds extensions to modify a method traversal based on config options + */ + implicit class MethodFilterExt(trav: Iterator[Method]): + + def withMethodNameFilter(implicit config: BaseConfig): Iterator[Method] = + config.methodNameFilter match + case Some(filter) => trav.name(filter) + case None => trav + + def withMethodParameterFilter(implicit config: BaseConfig): Iterator[Method] = + config.methodParamTypeFilter match + case Some(filter) => trav.where(_.parameter.evalType(filter)) + case None => trav + + def withMethodAnnotationFilter(implicit config: BaseConfig): Iterator[Method] = + config.methodAnnotationFilter match + case Some(filter) => trav.where(_.annotation.code(filter)) + case None => trav + + /** A trait for all objects that represent a 1:1 relationship between the CPG and all the slices + * extracted. + */ + sealed trait ProgramSlice: + + def toJson: String + + def toJsonPretty: String + + /** A data-flow slice vector for a given backwards intraprocedural path. + * + * @param nodes + * the nodes in the slice. + * @param edges + * a map linking nodes with their edges. + */ + case class DataFlowSlice(nodes: Set[SliceNode], edges: Set[SliceEdge]) extends ProgramSlice: + def toJson: String = this.asJson.noSpaces + + def toJsonPretty: String = this.asJson.spaces2 + + case class ReachableSlice(reachables: List[ReachableFlows]) extends ProgramSlice: + def toJson: String = this.asJson.noSpaces + + def toJsonPretty: String = this.asJson.spaces2 + + implicit val encodeDataFlowSlice: Encoder[DataFlowSlice] = + Encoder.instance { case DataFlowSlice(nodes, edges) => + Json.obj("nodes" -> nodes.asJson, "edges" -> edges.asJson) + } + + case class ReachableFlows(flows: List[SliceNode], purls: Set[String]) + + case class SliceNode( + id: Long, + label: String, + name: String = "", + fullName: String = "", + signature: String = "", + isExternal: Boolean = false, + code: String, + typeFullName: String = "", + parentMethodName: String = "", + parentMethodSignature: String = "", + parentFileName: String = "", + parentPackageName: String = "", + parentClassName: String = "", + lineNumber: Option[Integer] = None, + columnNumber: Option[Integer] = None, + tags: String = "" + ) + + implicit val encodeSliceNode: Encoder[SliceNode] = Encoder.instance { + case SliceNode( + id, + label, + name, + fullName, + signature, + isExternal, + code, + typeFullName, + parentMethodName, + parentMethodSignature, + parentFileName, + parentPackageName, + parentClassName, + lineNumber, + columnNumber, + tags + ) => + Json.obj( + "id" -> id.asJson, + "label" -> label.asJson, + "name" -> name.asJson, + "fullName" -> fullName.asJson, + "signature" -> signature.asJson, + "isExternal" -> isExternal.asJson, + "code" -> code.asJson, + "typeFullName" -> typeFullName.asJson, + "parentMethodName" -> parentMethodName.asJson, + "parentMethodSignature" -> parentMethodSignature.asJson, + "parentFileName" -> parentFileName.asJson, + "parentPackageName" -> parentPackageName.asJson, + "parentClassName" -> parentClassName.asJson, + "lineNumber" -> lineNumber.asJson, + "columnNumber" -> columnNumber.asJson, + "tags" -> tags.asJson + ) + } + + case class SliceEdge(src: Long, dst: Long, label: String) + + implicit val encodeSliceEdge: Encoder[SliceEdge] = + Encoder.instance { case SliceEdge(src, dst, label) => + Json.obj("src" -> src.asJson, "dst" -> dst.asJson, "label" -> label.asJson) + } + + /** A usage slice of an object at the start of its definition until its final usage. + * + * @param targetObj + * the name and type of the focus object. + * @param definedBy + * the name of the call, identifier, or literal that defined the target object, if available. + * @param invokedCalls + * calls this object is observed to call. + * @param argToCalls + * the calls this object is observed to be an argument of. + */ + case class ObjectUsageSlice( + targetObj: DefComponent, + definedBy: Option[DefComponent], + invokedCalls: List[ObservedCall], + argToCalls: List[ObservedCallWithArgPos] + ): + override def toString: String = + s"{tgt: $targetObj${definedBy.map(p => s" = $p").getOrElse("")}, " + + s"inv: [${invokedCalls.mkString(",")}], " + + s"argsTo: [${argToCalls.mkString(",")}]" + + s"}" + + implicit val decodeObjectUsageSlice: Decoder[ObjectUsageSlice] = + (c: HCursor) => + for + x <- c.downField("targetObj").as[DefComponent] + p <- c.downField("definedBy").as[Option[DefComponent]] + r <- c.downField("invokedCalls").as[List[ObservedCall]] + a <- c.downField("argToCalls").as[List[ObservedCallWithArgPos]] + yield ObjectUsageSlice(x, p, r, a) + implicit val encodeObjectUsageSlice: Encoder[ObjectUsageSlice] = + Encoder.instance { case ObjectUsageSlice(c, p, r, a) => + Json.obj( + "targetObj" -> c.asJson, + "definedBy" -> p.asJson, + "invokedCalls" -> r.asJson, + "argToCalls" -> a.asJson + ) + } + + /** Packages the object usage slices along with the method source code. + * + * @param code + * raw source code. + * @param fullName + * method full name. + * @param fileName + * the file name. + * @param slices + * the object usage slices. + */ + case class MethodUsageSlice( + code: String, + fullName: String, + signature: String, + fileName: String, + slices: Set[ObjectUsageSlice], + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None + ) + + implicit val decodeMethodUsageSlice: Decoder[MethodUsageSlice] = + (c: HCursor) => + for + code <- c.downField("code").as[String] + fn <- c.downField("fullName").as[String] + signature <- c.downField("signature").as[String] + fln <- c.downField("fileName").as[String] + ss <- c.downField("slices").as[Set[ObjectUsageSlice]] + lin <- c.downField("lineNumber").as[Option[Int]] + col <- c.downField("columnNumber").as[Option[Int]] + yield MethodUsageSlice(code, fn, signature, fln, ss, lin, col) + implicit val encodeMethodUsageSlice: Encoder[MethodUsageSlice] = + Encoder.instance { case MethodUsageSlice(a, b, signature, c, d, e, f) => + Json.obj( + "code" -> a.asJson, + "fullName" -> b.asJson, + "signature" -> signature.asJson, + "fileName" -> c.asJson, + "lineNumber" -> e.asJson, + "columnNumber" -> f.asJson, + "usages" -> d.asJson + ) + } + + /** Represents a source of data-generation, i.e., where data is defined and can be assigned to + * some variable or used in an argument. + */ + sealed trait DefComponent: + def name: String + + def typeFullName: String + + def label: String + + def lineNumber: Option[Int] + + def columnNumber: Option[Int] + + override def toString: String = + s"[$label] $name" + (if typeFullName.nonEmpty then s": $typeFullName" else "") + + /** Represents a local transfer of data via aliasing. The data defined is via some alias. + */ + case class LocalDef( + name: String, + typeFullName: String, + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None, + label: String = "LOCAL" + ) extends DefComponent + + implicit val localDefDecoder: Decoder[LocalDef] = deriveDecoder[LocalDef] + implicit val localDefEncoder: Encoder[LocalDef] = deriveEncoder[LocalDef] + + /** Represents a literal. + */ + case class LiteralDef( + name: String, + typeFullName: String, + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None, + label: String = "LITERAL" + ) extends DefComponent + + implicit val literalDefDecoder: Decoder[LiteralDef] = deriveDecoder[LiteralDef] + implicit val literalDefEncoder: Encoder[LiteralDef] = deriveEncoder[LiteralDef] + + /** Represents data introduced via a parameter. + * + * @param position + * the index of the parameter. + */ + case class ParamDef( + name: String, + typeFullName: String, + position: Integer, + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None, + label: String = "PARAM" + ) extends DefComponent: + override def toString: String = super.toString + s" @ pos #$position" + + implicit val paramDefDecoder: Decoder[ParamDef] = deriveDecoder[ParamDef] + implicit val paramDefEncoder: Encoder[ParamDef] = deriveEncoder[ParamDef] + + /** Represents data introduced by the return value of a call. + * + * @param resolvedMethod + * the full method path if resolved. + */ + case class CallDef( + name: String, + typeFullName: String, + resolvedMethod: Option[String] = None, + isExternal: Option[Boolean], + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None, + label: String = "CALL" + ) extends DefComponent: + override def toString: String = + super.toString + resolvedMethod.map(s => s" @ $s").getOrElse("") + + implicit val callDefDecoder: Decoder[CallDef] = deriveDecoder[CallDef] + implicit val callDefEncoder: Encoder[CallDef] = deriveEncoder[CallDef] + + /** Representds data introduced by an unhandled data structure. + */ + case class UnknownDef( + name: String, + typeFullName: String, + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None, + label: String = "UNKNOWN" + ) extends DefComponent + + implicit val unknownDefDecoder: Decoder[UnknownDef] = deriveDecoder[UnknownDef] + implicit val unknownDefEncoder: Encoder[UnknownDef] = deriveEncoder[UnknownDef] + + // The following encoders make sure the object does follow ClassName: { properties ... } format but instead + // is just { properties }. This makes it less automatically serializable but we have `label` to encode classes. + + implicit val encodeDefComponent: Encoder[DefComponent] = Encoder.instance { + case local @ LocalDef(_, _, _, _, _) => local.asJson + case literal @ LiteralDef(_, _, _, _, _) => literal.asJson + case call @ CallDef(_, _, _, _, _, _, _) => call.asJson + case param @ ParamDef(_, _, _, _, _, _) => param.asJson + case unknown @ UnknownDef(_, _, _, _, _) => unknown.asJson + } + + implicit val decodeDefComponent: Decoder[DefComponent] = + List[Decoder[DefComponent]]( + Decoder[LocalDef].widen, + Decoder[LiteralDef].widen, + Decoder[CallDef].widen, + Decoder[ParamDef].widen, + Decoder[UnknownDef].widen + ).reduceLeft(_.or(_)) + + object DefComponent: + + val unresolvedCallPattern: Pattern = Pattern.compile("^( - Json.obj("nodes" -> nodes.asJson, "edges" -> edges.asJson) - } - - case class ReachableFlows(flows: List[SliceNode], purls: Set[String]) - - case class SliceNode( - id: Long, - label: String, - name: String = "", - fullName: String = "", - signature: String = "", - isExternal: Boolean = false, - code: String, - typeFullName: String = "", - parentMethodName: String = "", - parentMethodSignature: String = "", - parentFileName: String = "", - parentPackageName: String = "", - parentClassName: String = "", - lineNumber: Option[Integer] = None, - columnNumber: Option[Integer] = None, - tags: String = "" - ) - - implicit val encodeSliceNode: Encoder[SliceNode] = Encoder.instance { - case SliceNode( - id, - label, - name, - fullName, - signature, + def fromNode( + node: StoredNode, + definedCallNode: StoredNode, + typeMap: Map[String, String] = Map.empty[String, String] + ): DefComponent = + var nodeType = (node.property(PropertyNames.TYPE_FULL_NAME, "ANY") +: node.property( + PropertyNames.DYNAMIC_TYPE_HINT_FULL_NAME, + Seq.empty[String] + )).filterNot(_.matches("(ANY|UNKNOWN)")).headOption.getOrElse("ANY") + if nodeType == "ANY" && definedCallNode.nonEmpty then + definedCallNode match + case x: Call => + val callName = x.code.takeWhile(_ != '(') + if callName == "require" || x.methodFullName == ".fieldAccess" + then + nodeType = x.argument.last.code.replace("\"", "") + case _ => + nodeType = "ANY" + val typeFullName = typeMap.getOrElse(nodeType, nodeType) + val lineNumber = Option( + node.property(new PropertyKey[Integer](PropertyNames.LINE_NUMBER)) + ).map(_.toInt) + val columnNumber = Option( + node.property(new PropertyKey[Integer](PropertyNames.COLUMN_NUMBER)) + ).map(_.toInt) + val isExternal = + Option(node.property(new PropertyKey[Boolean](PropertyNames.IS_EXTERNAL))) + node match + case x: MethodParameterIn => + ParamDef(x.name, typeFullName, x.index, lineNumber, columnNumber) + case x: Call if x.code.startsWith("new ") => + val typeName = x.code.stripPrefix("new ").takeWhile(!_.equals('(')) + CallDef( + x.code.takeWhile(_ != '('), + typeMap.getOrElse(typeName, x.typeFullName), + typeMap.get(typeName), isExternal, - code, - typeFullName, - parentMethodName, - parentMethodSignature, - parentFileName, - parentPackageName, - parentClassName, lineNumber, - columnNumber, - tags - ) => - Json.obj( - "id" -> id.asJson, - "label" -> label.asJson, - "name" -> name.asJson, - "fullName" -> fullName.asJson, - "signature" -> signature.asJson, - "isExternal" -> isExternal.asJson, - "code" -> code.asJson, - "typeFullName" -> typeFullName.asJson, - "parentMethodName" -> parentMethodName.asJson, - "parentMethodSignature" -> parentMethodSignature.asJson, - "parentFileName" -> parentFileName.asJson, - "parentPackageName" -> parentPackageName.asJson, - "parentClassName" -> parentClassName.asJson, - "lineNumber" -> lineNumber.asJson, - "columnNumber" -> columnNumber.asJson, - "tags" -> tags.asJson + columnNumber ) - } - - case class SliceEdge(src: Long, dst: Long, label: String) - - implicit val encodeSliceEdge: Encoder[SliceEdge] = - Encoder.instance { case SliceEdge(src, dst, label) => - Json.obj("src" -> src.asJson, "dst" -> dst.asJson, "label" -> label.asJson) - } - - /** A usage slice of an object at the start of its definition until its final usage. - * - * @param targetObj - * the name and type of the focus object. - * @param definedBy - * the name of the call, identifier, or literal that defined the target object, if available. - * @param invokedCalls - * calls this object is observed to call. - * @param argToCalls - * the calls this object is observed to be an argument of. - */ - case class ObjectUsageSlice( - targetObj: DefComponent, - definedBy: Option[DefComponent], - invokedCalls: List[ObservedCall], - argToCalls: List[ObservedCallWithArgPos] - ): - override def toString: String = - s"{tgt: $targetObj${definedBy.map(p => s" = $p").getOrElse("")}, " + - s"inv: [${invokedCalls.mkString(",")}], " + - s"argsTo: [${argToCalls.mkString(",")}]" + - s"}" - - implicit val decodeObjectUsageSlice: Decoder[ObjectUsageSlice] = - (c: HCursor) => - for - x <- c.downField("targetObj").as[DefComponent] - p <- c.downField("definedBy").as[Option[DefComponent]] - r <- c.downField("invokedCalls").as[List[ObservedCall]] - a <- c.downField("argToCalls").as[List[ObservedCallWithArgPos]] - yield ObjectUsageSlice(x, p, r, a) - implicit val encodeObjectUsageSlice: Encoder[ObjectUsageSlice] = - Encoder.instance { case ObjectUsageSlice(c, p, r, a) => - Json.obj( - "targetObj" -> c.asJson, - "definedBy" -> p.asJson, - "invokedCalls" -> r.asJson, - "argToCalls" -> a.asJson + case x: Call if unresolvedCallPattern.matcher(x.methodFullName).matches() => + val callName = x.code.takeWhile(_ != '(') + CallDef( + callName, + typeFullName, + Option(callName), + isExternal, + lineNumber, + columnNumber ) - } - - /** Packages the object usage slices along with the method source code. - * - * @param code - * raw source code. - * @param fullName - * method full name. - * @param fileName - * the file name. - * @param slices - * the object usage slices. - */ - case class MethodUsageSlice( - code: String, - fullName: String, - signature: String, - fileName: String, - slices: Set[ObjectUsageSlice], - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None - ) - - implicit val decodeMethodUsageSlice: Decoder[MethodUsageSlice] = - (c: HCursor) => - for - code <- c.downField("code").as[String] - fn <- c.downField("fullName").as[String] - signature <- c.downField("signature").as[String] - fln <- c.downField("fileName").as[String] - ss <- c.downField("slices").as[Set[ObjectUsageSlice]] - lin <- c.downField("lineNumber").as[Option[Int]] - col <- c.downField("columnNumber").as[Option[Int]] - yield MethodUsageSlice(code, fn, signature, fln, ss, lin, col) - implicit val encodeMethodUsageSlice: Encoder[MethodUsageSlice] = - Encoder.instance { case MethodUsageSlice(a, b, signature, c, d, e, f) => - Json.obj( - "code" -> a.asJson, - "fullName" -> b.asJson, - "signature" -> signature.asJson, - "fileName" -> c.asJson, - "lineNumber" -> e.asJson, - "columnNumber" -> f.asJson, - "usages" -> d.asJson + case x: Call => + val callName = x.code.takeWhile(_ != '(') + var resolvedMethod = Option(x.methodFullName) + if callName == "require" && resolvedMethod.get == ".fieldAccess" then + resolvedMethod = Option(x.code) + CallDef( + callName, + typeFullName, + resolvedMethod, + isExternal, + lineNumber, + columnNumber ) - } - - /** Represents a source of data-generation, i.e., where data is defined and can be assigned to - * some variable or used in an argument. - */ - sealed trait DefComponent: - def name: String - - def typeFullName: String - - def label: String - - def lineNumber: Option[Int] - - def columnNumber: Option[Int] - - override def toString: String = - s"[$label] $name" + (if typeFullName.nonEmpty then s": $typeFullName" else "") - - /** Represents a local transfer of data via aliasing. The data defined is via some alias. - */ - case class LocalDef( - name: String, - typeFullName: String, - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None, - label: String = "LOCAL" - ) extends DefComponent - - implicit val localDefDecoder: Decoder[LocalDef] = deriveDecoder[LocalDef] - implicit val localDefEncoder: Encoder[LocalDef] = deriveEncoder[LocalDef] - - /** Represents a literal. - */ - case class LiteralDef( - name: String, - typeFullName: String, - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None, - label: String = "LITERAL" - ) extends DefComponent - - implicit val literalDefDecoder: Decoder[LiteralDef] = deriveDecoder[LiteralDef] - implicit val literalDefEncoder: Encoder[LiteralDef] = deriveEncoder[LiteralDef] - - /** Represents data introduced via a parameter. - * - * @param position - * the index of the parameter. - */ - case class ParamDef( - name: String, - typeFullName: String, - position: Integer, - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None, - label: String = "PARAM" - ) extends DefComponent: - override def toString: String = super.toString + s" @ pos #$position" - - implicit val paramDefDecoder: Decoder[ParamDef] = deriveDecoder[ParamDef] - implicit val paramDefEncoder: Encoder[ParamDef] = deriveEncoder[ParamDef] - - /** Represents data introduced by the return value of a call. - * - * @param resolvedMethod - * the full method path if resolved. - */ - case class CallDef( - name: String, - typeFullName: String, - resolvedMethod: Option[String] = None, - isExternal: Option[Boolean], - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None, - label: String = "CALL" - ) extends DefComponent: - override def toString: String = - super.toString + resolvedMethod.map(s => s" @ $s").getOrElse("") - - implicit val callDefDecoder: Decoder[CallDef] = deriveDecoder[CallDef] - implicit val callDefEncoder: Encoder[CallDef] = deriveEncoder[CallDef] - - /** Representds data introduced by an unhandled data structure. - */ - case class UnknownDef( - name: String, - typeFullName: String, - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None, - label: String = "UNKNOWN" - ) extends DefComponent - - implicit val unknownDefDecoder: Decoder[UnknownDef] = deriveDecoder[UnknownDef] - implicit val unknownDefEncoder: Encoder[UnknownDef] = deriveEncoder[UnknownDef] - - // The following encoders make sure the object does follow ClassName: { properties ... } format but instead - // is just { properties }. This makes it less automatically serializable but we have `label` to encode classes. - - implicit val encodeDefComponent: Encoder[DefComponent] = Encoder.instance { - case local @ LocalDef(_, _, _, _, _) => local.asJson - case literal @ LiteralDef(_, _, _, _, _) => literal.asJson - case call @ CallDef(_, _, _, _, _, _, _) => call.asJson - case param @ ParamDef(_, _, _, _, _, _) => param.asJson - case unknown @ UnknownDef(_, _, _, _, _) => unknown.asJson - } - - implicit val decodeDefComponent: Decoder[DefComponent] = - List[Decoder[DefComponent]]( - Decoder[LocalDef].widen, - Decoder[LiteralDef].widen, - Decoder[CallDef].widen, - Decoder[ParamDef].widen, - Decoder[UnknownDef].widen - ).reduceLeft(_.or(_)) - - object DefComponent: - - val unresolvedCallPattern: Pattern = Pattern.compile("^( - val callName = x.code.takeWhile(_ != '(') - if callName == "require" || x.methodFullName == ".fieldAccess" - then - nodeType = x.argument.last.code.replace("\"", "") - case _ => - nodeType = "ANY" - val typeFullName = typeMap.getOrElse(nodeType, nodeType) - val lineNumber = Option( - node.property(new PropertyKey[Integer](PropertyNames.LINE_NUMBER)) - ).map(_.toInt) - val columnNumber = Option( - node.property(new PropertyKey[Integer](PropertyNames.COLUMN_NUMBER)) - ).map(_.toInt) - val isExternal = - Option(node.property(new PropertyKey[Boolean](PropertyNames.IS_EXTERNAL))) - node match - case x: MethodParameterIn => - ParamDef(x.name, typeFullName, x.index, lineNumber, columnNumber) - case x: Call if x.code.startsWith("new ") => - val typeName = x.code.stripPrefix("new ").takeWhile(!_.equals('(')) - CallDef( - x.code.takeWhile(_ != '('), - typeMap.getOrElse(typeName, x.typeFullName), - typeMap.get(typeName), - isExternal, - lineNumber, - columnNumber - ) - case x: Call if unresolvedCallPattern.matcher(x.methodFullName).matches() => - val callName = x.code.takeWhile(_ != '(') - CallDef( - callName, - typeFullName, - Option(callName), - isExternal, - lineNumber, - columnNumber - ) - case x: Call => - val callName = x.code.takeWhile(_ != '(') - var resolvedMethod = Option(x.methodFullName) - if callName == "require" && resolvedMethod.get == ".fieldAccess" then - resolvedMethod = Option(x.code) - CallDef( - callName, - typeFullName, - resolvedMethod, - isExternal, - lineNumber, - columnNumber - ) - case x: Identifier => LocalDef(x.name, typeFullName, lineNumber, columnNumber) - case x: Local => LocalDef(x.name, typeFullName, lineNumber, columnNumber) - case x: Literal => LiteralDef(x.code, typeFullName, lineNumber, columnNumber) - case x: Member => LocalDef(x.name, typeFullName, lineNumber, columnNumber) - case x: Method if x.callIn.nonEmpty => - val lastCall = x.callIn.last - CallDef( - lastCall.name, - lastCall.typeFullName, - Option(x.fullName), - isExternal, - lastCall.lineNumber.map(_.intValue()), - lastCall.columnNumber.map(_.intValue()) - ) - case x: Method if x.annotation.nonEmpty => - val annotation = x.annotation.last - CallDef( - annotation.name, - annotation.fullName, - Option(annotation.code), - isExternal, - annotation.lineNumber.map(_.intValue()), - annotation.columnNumber.map(_.intValue()), - label = annotation.label - ) - case x: AstNode => - var methodDecl = x.code.takeWhile(_ != ')') - if methodDecl.contains("(") && !methodDecl.endsWith(")") then - methodDecl = methodDecl + ")" - if (methodDecl.contains("\n") || methodDecl.contains( - " " - )) && methodDecl.contains("(") - then - methodDecl = methodDecl.takeWhile(_ != '(') - methodDecl = methodDecl - .replaceAll("\n", "") - .replaceAll("\t", " ") - .replaceAll("\\n", "") - .replaceAll("\\t\\t", " ") - .replaceAll("\\s+", " ") - unknownMethodDeclCache.getOrElseUpdate( - s"$methodDecl|$lineNumber|$columnNumber", - UnknownDef(methodDecl, typeFullName, lineNumber, columnNumber) - ) - end match - end fromNode - end DefComponent - - /** Call details in the usage slice. - * - * @param callName - * the name of the call. - * @param resolvedMethod - * the method full name if the call is resolved. - * @param paramTypes - * the observed parameter types. - * @param returnType - * the observed return type. - */ - sealed abstract class UsedCall( - callName: String, - resolvedMethod: Option[String], - paramTypes: List[String], - returnType: String, - isExternal: Option[Boolean], - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None - ): - override def toString: String = - s"$callName(${paramTypes.mkString(",")}):$returnType" - - /** Details related to an observed call. - */ - case class ObservedCall( - callName: String, - resolvedMethod: Option[String], - paramTypes: List[String], - returnType: String, - isExternal: Option[Boolean], - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None - ) extends UsedCall( - callName, - resolvedMethod, - paramTypes, - returnType, - isExternal, - lineNumber, - columnNumber - ) - - implicit val decodeObservedCall: Decoder[ObservedCall] = - (c: HCursor) => - for - x <- c.downField("callName").as[String] - m <- c.downField("resolvedMethod").as[Option[String]] - p <- c.downField("paramTypes").as[List[String]] - r <- c.downField("returnType").as[String] - ex <- c.downField("isExternal").as[Option[Boolean]] - lin <- c.downField("lineNumber").as[Option[Int]] - col <- c.downField("columnNumber").as[Option[Int]] - yield ObservedCall(x, m, p, r, ex, lin, col) - implicit val encodeObservedCall: Encoder[ObservedCall] = - Encoder.instance { case ObservedCall(c, m, p, r, ex, lin, col) => - Json.obj( - "callName" -> c.asJson, - "resolvedMethod" -> m.asJson, - "paramTypes" -> p.asJson, - "returnType" -> r.asJson, - "isExternal" -> ex.asJson, - "lineNumber" -> lin.asJson, - "columnNumber" -> col.asJson + case x: Identifier => LocalDef(x.name, typeFullName, lineNumber, columnNumber) + case x: Local => LocalDef(x.name, typeFullName, lineNumber, columnNumber) + case x: Literal => LiteralDef(x.code, typeFullName, lineNumber, columnNumber) + case x: Member => LocalDef(x.name, typeFullName, lineNumber, columnNumber) + case x: Method if x.callIn.nonEmpty => + val lastCall = x.callIn.last + CallDef( + lastCall.name, + lastCall.typeFullName, + Option(x.fullName), + isExternal, + lastCall.lineNumber.map(_.intValue()), + lastCall.columnNumber.map(_.intValue()) ) - } - - /** Extends observed call with a specific argument in mind. - * - * @param position - * adds the argument position as either a named argument or positional argument. - */ - case class ObservedCallWithArgPos( - callName: String, - resolvedMethod: Option[String], - paramTypes: List[String], - returnType: String, - position: Either[String, Int], - isExternal: Option[Boolean], - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None - ) extends UsedCall( - callName, - resolvedMethod, - paramTypes, - returnType, - isExternal, - lineNumber, - columnNumber - ): - override def toString: String = super.toString + " @ " + (position match - case Left(namedArg) => namedArg - case Right(argIdx) => argIdx - ) - end ObservedCallWithArgPos - - object ObservedCallWithArgPos: - def fromObservedCall(oc: ObservedCall, pos: Either[String, Int]): ObservedCallWithArgPos = - ObservedCallWithArgPos( - oc.callName, - oc.resolvedMethod, - oc.paramTypes, - oc.returnType, - pos, - oc.isExternal, - oc.lineNumber, - oc.columnNumber + case x: Method if x.annotation.nonEmpty => + val annotation = x.annotation.last + CallDef( + annotation.name, + annotation.fullName, + Option(annotation.code), + isExternal, + annotation.lineNumber.map(_.intValue()), + annotation.columnNumber.map(_.intValue()), + label = annotation.label ) - - implicit val decodeObservedCallWithArgPos: Decoder[ObservedCallWithArgPos] = - (c: HCursor) => - for - x <- c.downField("callName").as[String] - m <- c.downField("resolvedMethod").as[Option[String]] - p <- c.downField("paramTypes").as[List[String]] - r <- c.downField("returnType").as[String] - ex <- c.downField("isExternal").as[Option[Boolean]] - lin <- c.downField("lineNumber").as[Option[Int]] - col <- c.downField("columnNumber").as[Option[Int]] - yield - val pos = c.downField("position").as[Int] match - case Left(_) => - c.downField("position").as[String] match - case Left(err) => - throw new RuntimeException( - "Unable to decode `position` as the field is neither a string nor an integer", - err - ) - case Right(argName) => Left(argName) - case Right(argIdx) => Right(argIdx) - ObservedCallWithArgPos(x, m, p, r, pos, ex, lin, col) - implicit val encodeObservedCallWithArgPos: Encoder[ObservedCallWithArgPos] = - Encoder.instance { case ObservedCallWithArgPos(c, m, p, r, a, ex, lin, col) => - Json.obj( - "callName" -> c.asJson, - "resolvedMethod" -> m.asJson, - "paramTypes" -> p.asJson, - "returnType" -> r.asJson, - "position" -> (a match - case Left(argName) => argName.asJson - case Right(argIdx) => argIdx.asJson - ), - "isExternal" -> ex.asJson, - "lineNumber" -> lin.asJson, - "columnNumber" -> col.asJson + case x: AstNode => + var methodDecl = x.code.takeWhile(_ != ')') + if methodDecl.contains("(") && !methodDecl.endsWith(")") then + methodDecl = methodDecl + ")" + if (methodDecl.contains("\n") || methodDecl.contains( + " " + )) && methodDecl.contains("(") + then + methodDecl = methodDecl.takeWhile(_ != '(') + methodDecl = methodDecl + .replaceAll("\n", "") + .replaceAll("\t", " ") + .replaceAll("\\n", "") + .replaceAll("\\t\\t", " ") + .replaceAll("\\s+", " ") + unknownMethodDeclCache.getOrElseUpdate( + s"$methodDecl|$lineNumber|$columnNumber", + UnknownDef(methodDecl, typeFullName, lineNumber, columnNumber) ) - } - - implicit val encodeUsedCall: Encoder[UsedCall] = Encoder.instance { - case oc @ ObservedCall(_, _, _, _, _, _, _) => oc.asJson - case oca @ ObservedCallWithArgPos(_, _, _, _, _, _, _, _) => oca.asJson - } - - implicit val decodeUsedCall: Decoder[UsedCall] = - List[Decoder[UsedCall]]( - Decoder[ObservedCall].widen, - Decoder[ObservedCallWithArgPos].widen - ).reduceLeft(_.or(_)) - - /** Describes types defined within the application. - * - * @param name - * name of the type. - * @param fields - * the static or object fields. - * @param procedures - * defined, named procedures within the type. - */ - case class UserDefinedType( - name: String, - fields: List[LocalDef], - procedures: List[ObservedCall], - fileName: String = "", - lineNumber: Option[Int] = None, - columnNumber: Option[Int] = None + end match + end fromNode + end DefComponent + + /** Call details in the usage slice. + * + * @param callName + * the name of the call. + * @param resolvedMethod + * the method full name if the call is resolved. + * @param paramTypes + * the observed parameter types. + * @param returnType + * the observed return type. + */ + sealed abstract class UsedCall( + callName: String, + resolvedMethod: Option[String], + paramTypes: List[String], + returnType: String, + isExternal: Option[Boolean], + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None + ): + override def toString: String = + s"$callName(${paramTypes.mkString(",")}):$returnType" + + /** Details related to an observed call. + */ + case class ObservedCall( + callName: String, + resolvedMethod: Option[String], + paramTypes: List[String], + returnType: String, + isExternal: Option[Boolean], + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None + ) extends UsedCall( + callName, + resolvedMethod, + paramTypes, + returnType, + isExternal, + lineNumber, + columnNumber + ) + + implicit val decodeObservedCall: Decoder[ObservedCall] = + (c: HCursor) => + for + x <- c.downField("callName").as[String] + m <- c.downField("resolvedMethod").as[Option[String]] + p <- c.downField("paramTypes").as[List[String]] + r <- c.downField("returnType").as[String] + ex <- c.downField("isExternal").as[Option[Boolean]] + lin <- c.downField("lineNumber").as[Option[Int]] + col <- c.downField("columnNumber").as[Option[Int]] + yield ObservedCall(x, m, p, r, ex, lin, col) + implicit val encodeObservedCall: Encoder[ObservedCall] = + Encoder.instance { case ObservedCall(c, m, p, r, ex, lin, col) => + Json.obj( + "callName" -> c.asJson, + "resolvedMethod" -> m.asJson, + "paramTypes" -> p.asJson, + "returnType" -> r.asJson, + "isExternal" -> ex.asJson, + "lineNumber" -> lin.asJson, + "columnNumber" -> col.asJson + ) + } + + /** Extends observed call with a specific argument in mind. + * + * @param position + * adds the argument position as either a named argument or positional argument. + */ + case class ObservedCallWithArgPos( + callName: String, + resolvedMethod: Option[String], + paramTypes: List[String], + returnType: String, + position: Either[String, Int], + isExternal: Option[Boolean], + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None + ) extends UsedCall( + callName, + resolvedMethod, + paramTypes, + returnType, + isExternal, + lineNumber, + columnNumber + ): + override def toString: String = super.toString + " @ " + (position match + case Left(namedArg) => namedArg + case Right(argIdx) => argIdx ) + end ObservedCallWithArgPos + + object ObservedCallWithArgPos: + def fromObservedCall(oc: ObservedCall, pos: Either[String, Int]): ObservedCallWithArgPos = + ObservedCallWithArgPos( + oc.callName, + oc.resolvedMethod, + oc.paramTypes, + oc.returnType, + pos, + oc.isExternal, + oc.lineNumber, + oc.columnNumber + ) - implicit val decodeUserDefinedType: Decoder[UserDefinedType] = - (c: HCursor) => - for - n <- c.downField("name").as[String] - f <- c.downField("fields").as[List[LocalDef]] - p <- c.downField("procedures").as[List[ObservedCall]] - fn <- c.downField("fileName").as[String] - lin <- c.downField("lineNumber").as[Option[Int]] - col <- c.downField("columnNumber").as[Option[Int]] - yield UserDefinedType(n, f, p, fn, lin, col) - implicit val encodeUserDefinedType: Encoder[UserDefinedType] = - Encoder.instance { case UserDefinedType(n, f, p, fn, lin, col) => - Json.obj( - "name" -> n.asJson, - "fields" -> f.asJson, - "procedures" -> p.asJson, - "fileName" -> fn.asJson, - "lineNumber" -> lin.asJson, - "columnNumber" -> col.asJson - ) - } - - /** The program usage slices and UDTs. - * - * @param objectSlices - * the object slices under each procedure - * @param userDefinedTypes - * the UDTs. - */ - case class ProgramUsageSlice( - objectSlices: List[MethodUsageSlice], - userDefinedTypes: List[UserDefinedType] - ) extends ProgramSlice: - - def toJson: String = this.asJson.noSpaces - - def toJsonPretty: String = this.asJson.spaces2 - - implicit val decodeProgramUsageSlice: Decoder[ProgramUsageSlice] = - (c: HCursor) => - for - o <- c.downField("objectSlices").as[List[MethodUsageSlice]] - u <- c.downField("userDefinedTypes").as[List[UserDefinedType]] - yield ProgramUsageSlice(o, u) - implicit val encodeProgramUsageSlice: Encoder[ProgramUsageSlice] = Encoder.instance { - case ProgramUsageSlice(os, udts) => - Json.obj("objectSlices" -> os.asJson, "userDefinedTypes" -> udts.asJson) - } + implicit val decodeObservedCallWithArgPos: Decoder[ObservedCallWithArgPos] = + (c: HCursor) => + for + x <- c.downField("callName").as[String] + m <- c.downField("resolvedMethod").as[Option[String]] + p <- c.downField("paramTypes").as[List[String]] + r <- c.downField("returnType").as[String] + ex <- c.downField("isExternal").as[Option[Boolean]] + lin <- c.downField("lineNumber").as[Option[Int]] + col <- c.downField("columnNumber").as[Option[Int]] + yield + val pos = c.downField("position").as[Int] match + case Left(_) => + c.downField("position").as[String] match + case Left(err) => + throw new RuntimeException( + "Unable to decode `position` as the field is neither a string nor an integer", + err + ) + case Right(argName) => Left(argName) + case Right(argIdx) => Right(argIdx) + ObservedCallWithArgPos(x, m, p, r, pos, ex, lin, col) + implicit val encodeObservedCallWithArgPos: Encoder[ObservedCallWithArgPos] = + Encoder.instance { case ObservedCallWithArgPos(c, m, p, r, a, ex, lin, col) => + Json.obj( + "callName" -> c.asJson, + "resolvedMethod" -> m.asJson, + "paramTypes" -> p.asJson, + "returnType" -> r.asJson, + "position" -> (a match + case Left(argName) => argName.asJson + case Right(argIdx) => argIdx.asJson + ), + "isExternal" -> ex.asJson, + "lineNumber" -> lin.asJson, + "columnNumber" -> col.asJson + ) + } + + implicit val encodeUsedCall: Encoder[UsedCall] = Encoder.instance { + case oc @ ObservedCall(_, _, _, _, _, _, _) => oc.asJson + case oca @ ObservedCallWithArgPos(_, _, _, _, _, _, _, _) => oca.asJson + } + + implicit val decodeUsedCall: Decoder[UsedCall] = + List[Decoder[UsedCall]]( + Decoder[ObservedCall].widen, + Decoder[ObservedCallWithArgPos].widen + ).reduceLeft(_.or(_)) + + /** Describes types defined within the application. + * + * @param name + * name of the type. + * @param fields + * the static or object fields. + * @param procedures + * defined, named procedures within the type. + */ + case class UserDefinedType( + name: String, + fields: List[LocalDef], + procedures: List[ObservedCall], + fileName: String = "", + lineNumber: Option[Int] = None, + columnNumber: Option[Int] = None + ) + + implicit val decodeUserDefinedType: Decoder[UserDefinedType] = + (c: HCursor) => + for + n <- c.downField("name").as[String] + f <- c.downField("fields").as[List[LocalDef]] + p <- c.downField("procedures").as[List[ObservedCall]] + fn <- c.downField("fileName").as[String] + lin <- c.downField("lineNumber").as[Option[Int]] + col <- c.downField("columnNumber").as[Option[Int]] + yield UserDefinedType(n, f, p, fn, lin, col) + implicit val encodeUserDefinedType: Encoder[UserDefinedType] = + Encoder.instance { case UserDefinedType(n, f, p, fn, lin, col) => + Json.obj( + "name" -> n.asJson, + "fields" -> f.asJson, + "procedures" -> p.asJson, + "fileName" -> fn.asJson, + "lineNumber" -> lin.asJson, + "columnNumber" -> col.asJson + ) + } + + /** The program usage slices and UDTs. + * + * @param objectSlices + * the object slices under each procedure + * @param userDefinedTypes + * the UDTs. + */ + case class ProgramUsageSlice( + objectSlices: List[MethodUsageSlice], + userDefinedTypes: List[UserDefinedType] + ) extends ProgramSlice: + + def toJson: String = this.asJson.noSpaces + + def toJsonPretty: String = this.asJson.spaces2 + + implicit val decodeProgramUsageSlice: Decoder[ProgramUsageSlice] = + (c: HCursor) => + for + o <- c.downField("objectSlices").as[List[MethodUsageSlice]] + u <- c.downField("userDefinedTypes").as[List[UserDefinedType]] + yield ProgramUsageSlice(o, u) + implicit val encodeProgramUsageSlice: Encoder[ProgramUsageSlice] = Encoder.instance { + case ProgramUsageSlice(os, udts) => + Json.obj("objectSlices" -> os.asJson, "userDefinedTypes" -> udts.asJson) + } end slicing diff --git a/wrapper/nodejs/package-lock.json b/wrapper/nodejs/package-lock.json index c6ddb37..f4f8a86 100644 --- a/wrapper/nodejs/package-lock.json +++ b/wrapper/nodejs/package-lock.json @@ -1,12 +1,12 @@ { "name": "@appthreat/atom", - "version": "2.0.16", + "version": "2.0.17", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@appthreat/atom", - "version": "2.0.16", + "version": "2.0.17", "license": "Apache-2.0", "dependencies": { "@babel/parser": "^7.24.7", diff --git a/wrapper/nodejs/package.json b/wrapper/nodejs/package.json index 3644b4d..ed76736 100644 --- a/wrapper/nodejs/package.json +++ b/wrapper/nodejs/package.json @@ -1,6 +1,6 @@ { "name": "@appthreat/atom", - "version": "2.0.16", + "version": "2.0.17", "description": "Create atom (⚛) representation for your application, packages and libraries", "exports": "./index.js", "type": "module",