From 4d6c01a96ddaba16ecd6bbe5dc12239049929c8c Mon Sep 17 00:00:00 2001 From: Prabhu Subramanian Date: Thu, 5 Oct 2023 11:55:50 +0100 Subject: [PATCH] Input and output tagging Signed-off-by: Prabhu Subramanian --- build.sbt | 2 +- .../dataflowengineoss/language/Path.scala | 36 ++++-- .../passes/ConfigFileCreationPass.scala | 2 + .../x2cpg/src/main/resources/tags-vocab.txt | 79 ++++++++++++ .../frontend/XConfigFileCreationPass.scala | 6 +- .../x2cpg/passes/taggers/CdxPass.scala | 121 +++++------------- .../chencli/console/ChenConsole.scala | 6 - .../chencli/console/Predefined.scala | 11 ++ pyproject.toml | 2 +- 9 files changed, 152 insertions(+), 113 deletions(-) create mode 100644 platform/frontends/x2cpg/src/main/resources/tags-vocab.txt diff --git a/build.sbt b/build.sbt index 53cbbc6..8a06caf 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "chen" ThisBuild / organization := "io.appthreat" -ThisBuild / version := "0.0.11" +ThisBuild / version := "0.0.12" ThisBuild / scalaVersion := "3.3.1" val cpgVersion = "1.4.22" diff --git a/dataflowengineoss/src/main/scala/io/appthreat/dataflowengineoss/language/Path.scala b/dataflowengineoss/src/main/scala/io/appthreat/dataflowengineoss/language/Path.scala index 2f43423..5767c60 100644 --- a/dataflowengineoss/src/main/scala/io/appthreat/dataflowengineoss/language/Path.scala +++ b/dataflowengineoss/src/main/scala/io/appthreat/dataflowengineoss/language/Path.scala @@ -34,14 +34,17 @@ object Path { // TODO replace with dynamic rendering based on the terminal's width, e.g. in scala-repl-pp lazy val maxTrackedWidth = sys.env.get("CHEN_DATAFLOW_TRACKED_WIDTH").map(_.toInt).getOrElse(DefaultMaxTrackedWidth) + private def tagAsString(tag: Iterator[Tag]) = + if (tag.nonEmpty) tag.filterNot(_.name == "purl").name.mkString(", ") else "" + implicit val show: Show[Path] = { path => var caption = "" if (path.elements.size > 2) { val srcNode = path.elements.head - val srcTags = if (srcNode.tag.nonEmpty) srcNode.tag.filterNot(_.name == "purl").mkString(", ") else "" + val srcTags = tagAsString(srcNode.tag) val sinkNode = path.elements.last var sinkCode = sinkNode.code - val sinkTags = if (sinkNode.tag.nonEmpty) sinkNode.tag.filterNot(_.name == "purl").mkString(", ") else "" + val sinkTags = tagAsString(sinkNode.tag) sinkNode match { case cfgNode: CfgNode => val method = cfgNode.method @@ -59,16 +62,16 @@ object Path { val lineNumber = astNode.lineNumber.getOrElse("").toString val fileName = astNode.file.name.headOption.getOrElse("").replace("", "") var fileLocation = s"${fileName}#${lineNumber}" - var tags: String = if (astNode.tag.nonEmpty) astNode.tag.filterNot(_.name == "purl").name.mkString(", ") else "" + var tags: String = tagAsString(astNode.tag) if (fileLocation == "#") fileLocation = "N/A" astNode match { case methodParameterIn: MethodParameterIn => val methodName = methodParameterIn.method.name if (tags.isEmpty && methodParameterIn.method.tag.nonEmpty) { - tags = methodParameterIn.method.tag.filterNot(_.name == "purl").name.mkString(", ") + tags = tagAsString(methodParameterIn.method.tag) } if (tags.isEmpty && methodParameterIn.tag.nonEmpty) { - tags = methodParameterIn.tag.filterNot(_.name == "purl").name.mkString(", ") + tags = tagAsString(methodParameterIn.tag) } tableRows += Array[String]( "methodParameterIn", @@ -79,12 +82,15 @@ object Path { else ""), tags ) + case ret: Return => + val methodName = ret.method.name + tableRows += Array[String]("return", fileLocation, methodName, ret.argumentName.getOrElse(""), ret.code, tags) case identifier: Identifier => val methodName = identifier.method.name if (tags.isEmpty && identifier.inCall.nonEmpty && identifier.inCall.head.tag.nonEmpty) { - tags = identifier.inCall.head.tag.filterNot(_.name == "purl").name.mkString(", ") + tags = tagAsString(identifier.inCall.head.tag) } - if (!addedPaths.contains(s"${fileName}#${lineNumber}")) { + if (!addedPaths.contains(s"${fileName}#${lineNumber}") && identifier.inCall.nonEmpty) { tableRows += Array[String]( "identifier", fileLocation, @@ -107,7 +113,7 @@ object Path { ) && !call.name .startsWith(" val method = cfgNode.method if (tags.isEmpty && method.tag.nonEmpty) { - tags = method.tag.filterNot(_.name == "purl").name.mkString(", ") + tags = tagAsString(method.tag) } val methodName = method.name val statement = cfgNode match { case _: MethodParameterIn => if (tags.isEmpty && method.parameter.tag.nonEmpty) { - tags = method.parameter.tag.filterNot(_.name == "purl").name.mkString(", ") + tags = tagAsString(method.parameter.tag) } val paramsPretty = method.parameter.toList.sortBy(_.index).map(_.code).mkString(", ") s"$methodName($paramsPretty)" case _ => if (tags.isEmpty && cfgNode.statement.tag.nonEmpty) { - tags = cfgNode.statement.tag.filterNot(_.name == "purl").name.mkString(", ") + tags = tagAsString(cfgNode.statement.tag) } cfgNode.statement.repr } @@ -166,9 +172,11 @@ object Path { { val end_section = row.head == "call" val trow: Array[String] = row.tail - val tagsStr: String = if (trow(4).nonEmpty) s"Tags: ${trow(4)}" else "" - val methodStr = s"${trow(1)}\n${tagsStr}" - table.add_row(trow(0), methodStr.stripMargin, trow(2), trow(3), end_section = end_section) + if (trow(3) != "RET" && !trow(4).startsWith(".fieldAccess")) { + val tagsStr: String = if (trow(4).nonEmpty) s"Tags: ${trow(4)}" else "" + val methodStr = s"${trow(1)}\n${tagsStr}" + table.add_row(trow(0), methodStr.stripMargin, trow(2), trow(3), end_section = end_section) + } } } richConsole.print(table) diff --git a/platform/frontends/javasrc2cpg/src/main/scala/io/appthreat/javasrc2cpg/passes/ConfigFileCreationPass.scala b/platform/frontends/javasrc2cpg/src/main/scala/io/appthreat/javasrc2cpg/passes/ConfigFileCreationPass.scala index 2aa87f6..e194753 100644 --- a/platform/frontends/javasrc2cpg/src/main/scala/io/appthreat/javasrc2cpg/passes/ConfigFileCreationPass.scala +++ b/platform/frontends/javasrc2cpg/src/main/scala/io/appthreat/javasrc2cpg/passes/ConfigFileCreationPass.scala @@ -9,6 +9,8 @@ class ConfigFileCreationPass(cpg: Cpg) extends XConfigFileCreationPass(cpg) { override val configFileFilters: List[File => Boolean] = List( // JAVA_INTERNAL extensionFilter(".properties"), + // HTML + pathRegexFilter(".*resources/templates.*.html"), // JSP extensionFilter(".jsp"), // Velocity files, see https://velocity.apache.org diff --git a/platform/frontends/x2cpg/src/main/resources/tags-vocab.txt b/platform/frontends/x2cpg/src/main/resources/tags-vocab.txt new file mode 100644 index 0000000..0796a24 --- /dev/null +++ b/platform/frontends/x2cpg/src/main/resources/tags-vocab.txt @@ -0,0 +1,79 @@ +sql +http +xml +web +security +database +json +yaml +validation +sanitization +cloud +iam +auth +middleware +serialization +event +stream +rpc +socket +proto +resource +data +sensitive +template +log +service +api +slf4j +parse +emit +jdbc +connection +pool +beans +transaction +mysql +postgres +oracle +mongo +redis +splunk +stripe +payment +finance +currency +coin +monero +ssl +traffic +mvc +html +escape +rest +tomcat +jackson +hibernate +orm +aop +jwt +saml +token +tls +codec +cron +crypto +jce +certificate +developer +tools +autoconfigure +test +jsonpath +bytecode +mock +injection +comparators +transform +encode +decode \ No newline at end of file diff --git a/platform/frontends/x2cpg/src/main/scala/io/appthreat/x2cpg/passes/frontend/XConfigFileCreationPass.scala b/platform/frontends/x2cpg/src/main/scala/io/appthreat/x2cpg/passes/frontend/XConfigFileCreationPass.scala index 9f794c6..63df372 100644 --- a/platform/frontends/x2cpg/src/main/scala/io/appthreat/x2cpg/passes/frontend/XConfigFileCreationPass.scala +++ b/platform/frontends/x2cpg/src/main/scala/io/appthreat/x2cpg/passes/frontend/XConfigFileCreationPass.scala @@ -4,7 +4,7 @@ import better.files.File import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.codepropertygraph.generated.nodes.NewConfigFile import io.shiftleft.passes.ConcurrentWriterCpgPass -import io.shiftleft.semanticcpg.language._ +import io.shiftleft.semanticcpg.language.* import io.shiftleft.utils.IOUtils import org.slf4j.LoggerFactory @@ -69,6 +69,10 @@ abstract class XConfigFileCreationPass(cpg: Cpg) extends ConcurrentWriterCpgPass file.canonicalPath.endsWith(pathEnd) } + protected def pathRegexFilter(pathRegex: String)(file: File): Boolean = { + file.canonicalPath.matches(pathRegex) + } + private def isConfigFile(file: File): Boolean = { configFileFilters.exists(predicate => predicate(file)) } diff --git a/platform/frontends/x2cpg/src/main/scala/io/appthreat/x2cpg/passes/taggers/CdxPass.scala b/platform/frontends/x2cpg/src/main/scala/io/appthreat/x2cpg/passes/taggers/CdxPass.scala index 376c1b3..36beb73 100644 --- a/platform/frontends/x2cpg/src/main/scala/io/appthreat/x2cpg/passes/taggers/CdxPass.scala +++ b/platform/frontends/x2cpg/src/main/scala/io/appthreat/x2cpg/passes/taggers/CdxPass.scala @@ -3,125 +3,66 @@ package io.appthreat.x2cpg.passes.taggers import io.circe.* import io.circe.parser.* import io.shiftleft.codepropertygraph.Cpg -import io.shiftleft.codepropertygraph.generated.EdgeTypes -import io.shiftleft.codepropertygraph.generated.nodes.NewNamespace import io.shiftleft.passes.CpgPass import io.shiftleft.semanticcpg.language.* import java.util.regex.Pattern +import scala.io.Source /** Creates tags on typeDecl and call nodes based on a cdx document */ class CdxPass(cpg: Cpg) extends CpgPass(cpg) { - // Some hardcoded list of keywords to look for in the description. Hopefully this would be performed with a better category tagger in the future - private val keywords = Seq( - "sql", - "http", - "xml", - "web", - "security", - "database", - "json", - "yaml", - "validation", - "sanitization", - "cloud", - "iam", - "auth", - "middleware", - "serialization", - "event", - "stream", - "rpc", - "socket", - "proto", - "resource", - "data", - "sensitive", - "template", - "log", - "service", - "api", - "slf4j", - "parse", - "emit", - "jdbc", - "connection", - "pool", - "beans", - "transaction", - "mysql", - "postgres", - "oracle", - "mongo", - "redis", - "splunk", - "stripe", - "payment", - "finance", - "currency", - "coin", - "monero", - "ssl", - "traffic", - "mvc", - "html", - "escape", - "rest", - "tomcat", - "jackson", - "hibernate", - "orm", - "aop", - "jwt", - "saml", - "token", - "tls", - "codec", - "cron", - "crypto", - "jce", - "certificate", - "developer", - "tools", - "autoconfigure", - "test", - "jsonpath", - "bytecode", - "mock", - "injection" - ) + // tags list as a seed + private val keywords: List[String] = Source.fromResource("tags-vocab.txt").getLines.toList + + private def containsRegex(str: String) = Pattern.quote(str) == str + + private val BOM_JSON_FILE = "bom.json" override def run(dstGraph: DiffGraphBuilder): Unit = { - cpg.configFile("bom.json").content.foreach { cdxData => + cpg.configFile(BOM_JSON_FILE).content.foreach { cdxData => val cdxJson = parse(cdxData).getOrElse(Json.Null) val cursor: HCursor = cdxJson.hcursor val components = cursor.downField("components").focus.flatMap(_.asArray).getOrElse(Vector.empty) components.foreach { comp => - val compPurl = comp.hcursor.downField("purl").as[String].getOrElse("") + val PURL_TYPE = "purl" + val compPurl = comp.hcursor.downField(PURL_TYPE).as[String].getOrElse("") val compType = comp.hcursor.downField("type").as[String].getOrElse("") val compDescription: String = comp.hcursor.downField("description").as[String].getOrElse("") - val descTags = keywords.filter(compDescription.toLowerCase().contains(_)) + val descTags = keywords.filter(k => compDescription.toLowerCase().contains(" " + k)) val properties = comp.hcursor.downField("properties").focus.flatMap(_.asArray).getOrElse(Vector.empty) properties.foreach { ns => val nsstr = ns.hcursor.downField("value").as[String].getOrElse("") - nsstr.split("\n").foreach { (pkg: String) => + nsstr.split("\n").filterNot(_.contains("test")).filterNot(_.contains("mock")).foreach { (pkg: String) => val bpkg = pkg.takeWhile(_ != '$') - cpg.call.typeFullNameExact(bpkg).newTagNodePair("purl", compPurl).store()(dstGraph) - cpg.method.parameter.typeFullNameExact(bpkg).newTagNodePair("purl", compPurl).store()(dstGraph) - if (!bpkg.contains("[") && !bpkg.contains("*")) - cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNodePair("purl", compPurl).store()(dstGraph) + cpg.call.typeFullNameExact(bpkg).newTagNodePair(PURL_TYPE, compPurl).store()(dstGraph) + cpg.method.parameter.typeFullNameExact(bpkg).newTagNodePair(PURL_TYPE, compPurl).store()(dstGraph) + if (!containsRegex(bpkg)) + cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNodePair(PURL_TYPE, compPurl).store()(dstGraph) if (compType != "library") { cpg.call.typeFullNameExact(bpkg).newTagNode(compType).store()(dstGraph) + cpg.call.typeFullNameExact(bpkg).receiver.newTagNode(s"$compType-value").store()(dstGraph) cpg.method.parameter.typeFullNameExact(bpkg).newTagNode(compType).store()(dstGraph) - if (!bpkg.contains("[") && !bpkg.contains("*")) + if (!containsRegex(bpkg)) cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(compType).store()(dstGraph) } + if (compType == "framework") { + def frameworkAnnotatedMethod = cpg.annotation + .fullNameExact(bpkg) + .method + frameworkAnnotatedMethod.parameter + .newTagNode(s"$compType-input") + .store()(dstGraph) + cpg.ret + .where(_.method.annotation.fullNameExact(bpkg)) + .newTagNode(s"$compType-output") + .store()(dstGraph) + } descTags.foreach { t => cpg.call.typeFullNameExact(bpkg).newTagNode(t).store()(dstGraph) cpg.method.parameter.typeFullNameExact(bpkg).newTagNode(t).store()(dstGraph) - if (!bpkg.contains("[") && !bpkg.contains("*")) + if (!containsRegex(bpkg)) cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(t).store()(dstGraph) } } diff --git a/platform/src/main/scala/io/appthreat/chencli/console/ChenConsole.scala b/platform/src/main/scala/io/appthreat/chencli/console/ChenConsole.scala index 020ee0e..7ac6992 100644 --- a/platform/src/main/scala/io/appthreat/chencli/console/ChenConsole.scala +++ b/platform/src/main/scala/io/appthreat/chencli/console/ChenConsole.scala @@ -50,22 +50,16 @@ object ChenConsole { def banner(): String = s""" - | | _ _ _ _ _ __ |/ |_ _ ._ ._ _. o |_ / \\ / \\ / \\ / |_|_ |\\_ | | (/_ | | | | (_| | |_) \\_/ \\_/ \\_/ / | | - | |Version: $version - |$helpMsg """.stripMargin def version: String = getClass.getPackage.getImplementationVersion - private def helpMsg: String = - s"""Type `help` to begin""".stripMargin - def defaultConfig: ConsoleConfig = new ConsoleConfig() } diff --git a/platform/src/main/scala/io/appthreat/chencli/console/Predefined.scala b/platform/src/main/scala/io/appthreat/chencli/console/Predefined.scala index 85d5df8..973fadc 100644 --- a/platform/src/main/scala/io/appthreat/chencli/console/Predefined.scala +++ b/platform/src/main/scala/io/appthreat/chencli/console/Predefined.scala @@ -8,6 +8,7 @@ object Predefined { Seq( "import _root_.io.appthreat.console._", "import _root_.io.appthreat.chencli.console.ChenConsole._", + "import _root_.io.appthreat.chencli.console.Chen.context", "import _root_.io.shiftleft.codepropertygraph.Cpg", "import _root_.io.shiftleft.codepropertygraph.Cpg.docSearchPackages", "import _root_.io.shiftleft.codepropertygraph.cpgloading._", @@ -20,7 +21,17 @@ object Predefined { "import overflowdb.traversal.{`package` => _, help => _, _}", "import scala.jdk.CollectionConverters._", """ + |def reachables(sourceTag: String, sinkTag: String)(implicit atom: Cpg): Unit = { + | try { + | def source=atom.tag.name(sourceTag).parameter + | def sink=atom.ret.where(_.tag.name(sinkTag)) + | sink.df(source).t + | } catch { + | case exc: Exception => + | } + |} | + |def reachables(implicit atom: Cpg): Unit = reachables("framework-input", "framework-output") | |""".stripMargin ) diff --git a/pyproject.toml b/pyproject.toml index 69cd6a2..f14299b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "appthreat-chen" -version = "0.0.11" +version = "0.0.12" description = "Code Hierarchy Exploration Net (chen)" authors = ["Team AppThreat "] license = "Apache-2.0"