Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Input and output tagging #11

Merged
merged 1 commit into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name := "chen"
ThisBuild / organization := "io.appthreat"
ThisBuild / version := "0.0.11"
ThisBuild / version := "0.0.12"
ThisBuild / scalaVersion := "3.3.1"

val cpgVersion = "1.4.22"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,17 @@ object Path {
// TODO replace with dynamic rendering based on the terminal's width, e.g. in scala-repl-pp
lazy val maxTrackedWidth = sys.env.get("CHEN_DATAFLOW_TRACKED_WIDTH").map(_.toInt).getOrElse(DefaultMaxTrackedWidth)

private def tagAsString(tag: Iterator[Tag]) =
if (tag.nonEmpty) tag.filterNot(_.name == "purl").name.mkString(", ") else ""

implicit val show: Show[Path] = { path =>
var caption = ""
if (path.elements.size > 2) {
val srcNode = path.elements.head
val srcTags = if (srcNode.tag.nonEmpty) srcNode.tag.filterNot(_.name == "purl").mkString(", ") else ""
val srcTags = tagAsString(srcNode.tag)
val sinkNode = path.elements.last
var sinkCode = sinkNode.code
val sinkTags = if (sinkNode.tag.nonEmpty) sinkNode.tag.filterNot(_.name == "purl").mkString(", ") else ""
val sinkTags = tagAsString(sinkNode.tag)
sinkNode match {
case cfgNode: CfgNode =>
val method = cfgNode.method
Expand All @@ -59,16 +62,16 @@ object Path {
val lineNumber = astNode.lineNumber.getOrElse("").toString
val fileName = astNode.file.name.headOption.getOrElse("").replace("<unknown>", "")
var fileLocation = s"${fileName}#${lineNumber}"
var tags: String = if (astNode.tag.nonEmpty) astNode.tag.filterNot(_.name == "purl").name.mkString(", ") else ""
var tags: String = tagAsString(astNode.tag)
if (fileLocation == "#") fileLocation = "N/A"
astNode match {
case methodParameterIn: MethodParameterIn =>
val methodName = methodParameterIn.method.name
if (tags.isEmpty && methodParameterIn.method.tag.nonEmpty) {
tags = methodParameterIn.method.tag.filterNot(_.name == "purl").name.mkString(", ")
tags = tagAsString(methodParameterIn.method.tag)
}
if (tags.isEmpty && methodParameterIn.tag.nonEmpty) {
tags = methodParameterIn.tag.filterNot(_.name == "purl").name.mkString(", ")
tags = tagAsString(methodParameterIn.tag)
}
tableRows += Array[String](
"methodParameterIn",
Expand All @@ -79,12 +82,15 @@ object Path {
else ""),
tags
)
case ret: Return =>
val methodName = ret.method.name
tableRows += Array[String]("return", fileLocation, methodName, ret.argumentName.getOrElse(""), ret.code, tags)
case identifier: Identifier =>
val methodName = identifier.method.name
if (tags.isEmpty && identifier.inCall.nonEmpty && identifier.inCall.head.tag.nonEmpty) {
tags = identifier.inCall.head.tag.filterNot(_.name == "purl").name.mkString(", ")
tags = tagAsString(identifier.inCall.head.tag)
}
if (!addedPaths.contains(s"${fileName}#${lineNumber}")) {
if (!addedPaths.contains(s"${fileName}#${lineNumber}") && identifier.inCall.nonEmpty) {
tableRows += Array[String](
"identifier",
fileLocation,
Expand All @@ -107,7 +113,7 @@ object Path {
) && !call.name
.startsWith("<operator") && !call.methodFullName.startsWith("new ")
) {
tags = call.callee(NoResolve).head.tag.filterNot(_.name == "purl").name.mkString(", ")
tags = tagAsString(call.callee(NoResolve).head.tag)
}
var callIcon =
if (
Expand All @@ -128,19 +134,19 @@ object Path {
case cfgNode: CfgNode =>
val method = cfgNode.method
if (tags.isEmpty && method.tag.nonEmpty) {
tags = method.tag.filterNot(_.name == "purl").name.mkString(", ")
tags = tagAsString(method.tag)
}
val methodName = method.name
val statement = cfgNode match {
case _: MethodParameterIn =>
if (tags.isEmpty && method.parameter.tag.nonEmpty) {
tags = method.parameter.tag.filterNot(_.name == "purl").name.mkString(", ")
tags = tagAsString(method.parameter.tag)
}
val paramsPretty = method.parameter.toList.sortBy(_.index).map(_.code).mkString(", ")
s"$methodName($paramsPretty)"
case _ =>
if (tags.isEmpty && cfgNode.statement.tag.nonEmpty) {
tags = cfgNode.statement.tag.filterNot(_.name == "purl").name.mkString(", ")
tags = tagAsString(cfgNode.statement.tag)
}
cfgNode.statement.repr
}
Expand All @@ -166,9 +172,11 @@ object Path {
{
val end_section = row.head == "call"
val trow: Array[String] = row.tail
val tagsStr: String = if (trow(4).nonEmpty) s"Tags: ${trow(4)}" else ""
val methodStr = s"${trow(1)}\n${tagsStr}"
table.add_row(trow(0), methodStr.stripMargin, trow(2), trow(3), end_section = end_section)
if (trow(3) != "RET" && !trow(4).startsWith("<operator>.fieldAccess")) {
val tagsStr: String = if (trow(4).nonEmpty) s"Tags: ${trow(4)}" else ""
val methodStr = s"${trow(1)}\n${tagsStr}"
table.add_row(trow(0), methodStr.stripMargin, trow(2), trow(3), end_section = end_section)
}
}
}
richConsole.print(table)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ class ConfigFileCreationPass(cpg: Cpg) extends XConfigFileCreationPass(cpg) {
override val configFileFilters: List[File => Boolean] = List(
// JAVA_INTERNAL
extensionFilter(".properties"),
// HTML
pathRegexFilter(".*resources/templates.*.html"),
// JSP
extensionFilter(".jsp"),
// Velocity files, see https://velocity.apache.org
Expand Down
79 changes: 79 additions & 0 deletions platform/frontends/x2cpg/src/main/resources/tags-vocab.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
sql
http
xml
web
security
database
json
yaml
validation
sanitization
cloud
iam
auth
middleware
serialization
event
stream
rpc
socket
proto
resource
data
sensitive
template
log
service
api
slf4j
parse
emit
jdbc
connection
pool
beans
transaction
mysql
postgres
oracle
mongo
redis
splunk
stripe
payment
finance
currency
coin
monero
ssl
traffic
mvc
html
escape
rest
tomcat
jackson
hibernate
orm
aop
jwt
saml
token
tls
codec
cron
crypto
jce
certificate
developer
tools
autoconfigure
test
jsonpath
bytecode
mock
injection
comparators
transform
encode
decode
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import better.files.File
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.NewConfigFile
import io.shiftleft.passes.ConcurrentWriterCpgPass
import io.shiftleft.semanticcpg.language._
import io.shiftleft.semanticcpg.language.*
import io.shiftleft.utils.IOUtils
import org.slf4j.LoggerFactory

Expand Down Expand Up @@ -69,6 +69,10 @@ abstract class XConfigFileCreationPass(cpg: Cpg) extends ConcurrentWriterCpgPass
file.canonicalPath.endsWith(pathEnd)
}

protected def pathRegexFilter(pathRegex: String)(file: File): Boolean = {
file.canonicalPath.matches(pathRegex)
}

private def isConfigFile(file: File): Boolean = {
configFileFilters.exists(predicate => predicate(file))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,125 +3,66 @@ package io.appthreat.x2cpg.passes.taggers
import io.circe.*
import io.circe.parser.*
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.EdgeTypes
import io.shiftleft.codepropertygraph.generated.nodes.NewNamespace
import io.shiftleft.passes.CpgPass
import io.shiftleft.semanticcpg.language.*

import java.util.regex.Pattern
import scala.io.Source

/** Creates tags on typeDecl and call nodes based on a cdx document
*/
class CdxPass(cpg: Cpg) extends CpgPass(cpg) {

// Some hardcoded list of keywords to look for in the description. Hopefully this would be performed with a better category tagger in the future
private val keywords = Seq(
"sql",
"http",
"xml",
"web",
"security",
"database",
"json",
"yaml",
"validation",
"sanitization",
"cloud",
"iam",
"auth",
"middleware",
"serialization",
"event",
"stream",
"rpc",
"socket",
"proto",
"resource",
"data",
"sensitive",
"template",
"log",
"service",
"api",
"slf4j",
"parse",
"emit",
"jdbc",
"connection",
"pool",
"beans",
"transaction",
"mysql",
"postgres",
"oracle",
"mongo",
"redis",
"splunk",
"stripe",
"payment",
"finance",
"currency",
"coin",
"monero",
"ssl",
"traffic",
"mvc",
"html",
"escape",
"rest",
"tomcat",
"jackson",
"hibernate",
"orm",
"aop",
"jwt",
"saml",
"token",
"tls",
"codec",
"cron",
"crypto",
"jce",
"certificate",
"developer",
"tools",
"autoconfigure",
"test",
"jsonpath",
"bytecode",
"mock",
"injection"
)
// tags list as a seed
private val keywords: List[String] = Source.fromResource("tags-vocab.txt").getLines.toList

private def containsRegex(str: String) = Pattern.quote(str) == str

private val BOM_JSON_FILE = "bom.json"

override def run(dstGraph: DiffGraphBuilder): Unit = {
cpg.configFile("bom.json").content.foreach { cdxData =>
cpg.configFile(BOM_JSON_FILE).content.foreach { cdxData =>
val cdxJson = parse(cdxData).getOrElse(Json.Null)
val cursor: HCursor = cdxJson.hcursor
val components = cursor.downField("components").focus.flatMap(_.asArray).getOrElse(Vector.empty)
components.foreach { comp =>
val compPurl = comp.hcursor.downField("purl").as[String].getOrElse("")
val PURL_TYPE = "purl"
val compPurl = comp.hcursor.downField(PURL_TYPE).as[String].getOrElse("")
val compType = comp.hcursor.downField("type").as[String].getOrElse("")
val compDescription: String = comp.hcursor.downField("description").as[String].getOrElse("")
val descTags = keywords.filter(compDescription.toLowerCase().contains(_))
val descTags = keywords.filter(k => compDescription.toLowerCase().contains(" " + k))
val properties = comp.hcursor.downField("properties").focus.flatMap(_.asArray).getOrElse(Vector.empty)
properties.foreach { ns =>
val nsstr = ns.hcursor.downField("value").as[String].getOrElse("")
nsstr.split("\n").foreach { (pkg: String) =>
nsstr.split("\n").filterNot(_.contains("test")).filterNot(_.contains("mock")).foreach { (pkg: String) =>
val bpkg = pkg.takeWhile(_ != '$')
cpg.call.typeFullNameExact(bpkg).newTagNodePair("purl", compPurl).store()(dstGraph)
cpg.method.parameter.typeFullNameExact(bpkg).newTagNodePair("purl", compPurl).store()(dstGraph)
if (!bpkg.contains("[") && !bpkg.contains("*"))
cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNodePair("purl", compPurl).store()(dstGraph)
cpg.call.typeFullNameExact(bpkg).newTagNodePair(PURL_TYPE, compPurl).store()(dstGraph)
cpg.method.parameter.typeFullNameExact(bpkg).newTagNodePair(PURL_TYPE, compPurl).store()(dstGraph)
if (!containsRegex(bpkg))
cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNodePair(PURL_TYPE, compPurl).store()(dstGraph)
if (compType != "library") {
cpg.call.typeFullNameExact(bpkg).newTagNode(compType).store()(dstGraph)
cpg.call.typeFullNameExact(bpkg).receiver.newTagNode(s"$compType-value").store()(dstGraph)
cpg.method.parameter.typeFullNameExact(bpkg).newTagNode(compType).store()(dstGraph)
if (!bpkg.contains("[") && !bpkg.contains("*"))
if (!containsRegex(bpkg))
cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(compType).store()(dstGraph)
}
if (compType == "framework") {
def frameworkAnnotatedMethod = cpg.annotation
.fullNameExact(bpkg)
.method
frameworkAnnotatedMethod.parameter
.newTagNode(s"$compType-input")
.store()(dstGraph)
cpg.ret
.where(_.method.annotation.fullNameExact(bpkg))
.newTagNode(s"$compType-output")
.store()(dstGraph)
}
descTags.foreach { t =>
cpg.call.typeFullNameExact(bpkg).newTagNode(t).store()(dstGraph)
cpg.method.parameter.typeFullNameExact(bpkg).newTagNode(t).store()(dstGraph)
if (!bpkg.contains("[") && !bpkg.contains("*"))
if (!containsRegex(bpkg))
cpg.method.fullName(s"${Pattern.quote(bpkg)}.*").newTagNode(t).store()(dstGraph)
}
}
Expand Down
Loading