Skip to content

Commit

Permalink
Include imported symbols in parsedeps (#87)
Browse files Browse the repository at this point in the history
* Include imported symbols in parsedeps

Signed-off-by: Prabhu Subramanian <prabhu@appthreat.com>

* python + django

Signed-off-by: Prabhu Subramanian <prabhu@appthreat.com>

* python routes

Signed-off-by: Prabhu Subramanian <prabhu@appthreat.com>

---------

Signed-off-by: Prabhu Subramanian <prabhu@appthreat.com>
  • Loading branch information
prabhu authored Oct 25, 2023
1 parent 44d58f0 commit 1b0a8af
Show file tree
Hide file tree
Showing 14 changed files with 133 additions and 38 deletions.
13 changes: 13 additions & 0 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ jobs:
with:
repository: 'HooliCorp/DjanGoat'
path: 'repotests/DjanGoat'
- uses: actions/checkout@v3
with:
repository: 'DefectDojo/django-DefectDojo'
path: 'repotests/django-DefectDojo'
- uses: coursier/cache-action@v6
- name: Set up JDK
uses: actions/setup-java@v3
Expand All @@ -41,6 +45,7 @@ jobs:
./atom.sh -o /tmp/juice.atom -l js $GITHUB_WORKSPACE/repotests/juice-shop -Dlog4j.configurationFile=log4j2.xml
./atom.sh -o /tmp/ts.atom -l js $GITHUB_WORKSPACE/repotests/shiftleft-ts-example -Dlog4j.configurationFile=log4j2.xml
./atom.sh -o /tmp/py.atom -l python $GITHUB_WORKSPACE/repotests/DjanGoat -Dlog4j.configurationFile=log4j2.xml
./atom.sh -o /tmp/py2.atom -l python $GITHUB_WORKSPACE/repotests/django-DefectDojo -Dlog4j.configurationFile=log4j2.xml
./atom.sh -o /tmp/c.atom -l c $GITHUB_WORKSPACE/repotests/libexpat -Dlog4j.configurationFile=log4j2.xml
./atom.sh data-flow -o /tmp/java2.atom -l java $GITHUB_WORKSPACE/repotests/shiftleft-java-example -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/java.slices.json
Expand All @@ -54,7 +59,15 @@ jobs:
./atom.sh usages -o /tmp/juice3.atom -l js $GITHUB_WORKSPACE/repotests/juice-shop -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/juice.usages.json
./atom.sh usages -o /tmp/ts3.atom -l js $GITHUB_WORKSPACE/repotests/shiftleft-ts-example -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/ts.usages.json
./atom.sh usages -o /tmp/py3.atom -l python $GITHUB_WORKSPACE/repotests/DjanGoat -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/py.usages.json
./atom.sh usages -o /tmp/py4.atom -l python $GITHUB_WORKSPACE/repotests/django-DefectDojo -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/py4.usages.json
./atom.sh usages -o /tmp/c3.atom -l c $GITHUB_WORKSPACE/repotests/libexpat -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/c.usages.json
ls -lh /tmp/*.atom /tmp/*.json
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- run: |
npm install -g @cyclonedx/cdxgen --omit=optional
cdxgen -t python --deep -o $GITHUB_WORKSPACE/repotests/django-DefectDojo/bom.json $GITHUB_WORKSPACE/repotests/django-DefectDojo
./atom.sh reachables -o /tmp/django-DefectDojo.atom -l python $GITHUB_WORKSPACE/repotests/django-DefectDojo -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/django-DefectDojo.reachables.json
env:
JAVA_TOOL_OPTIONS: "-Dfile.encoding=UTF-8"
if: runner.os != 'Windows'
9 changes: 9 additions & 0 deletions .github/workflows/repotests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ jobs:
with:
repository: 'HooliCorp/DjanGoat'
path: 'repotests/DjanGoat'
- uses: actions/checkout@v3
with:
repository: 'DefectDojo/django-DefectDojo'
path: 'repotests/django-DefectDojo'
- uses: coursier/cache-action@v6
- name: Set up JDK
uses: actions/setup-java@v3
Expand All @@ -57,6 +61,7 @@ jobs:
./atom.sh -o /tmp/ts.atom -l js $GITHUB_WORKSPACE/repotests/shiftleft-ts-example -Dlog4j.configurationFile=log4j2.xml
./atom.sh -o /tmp/py.atom -l python $GITHUB_WORKSPACE/repotests/DjanGoat -Dlog4j.configurationFile=log4j2.xml
./atom.sh parsedeps -o /tmp/py.atom -l python $GITHUB_WORKSPACE/repotests/DjanGoat -Dlog4j.configurationFile=log4j2.xml
./atom.sh -o /tmp/py2.atom -l python $GITHUB_WORKSPACE/repotests/django-DefectDojo -Dlog4j.configurationFile=log4j2.xml
./atom.sh -o /tmp/c.atom -l c $GITHUB_WORKSPACE/repotests/libexpat -Dlog4j.configurationFile=log4j2.xml
./atom.sh data-flow -o /tmp/java2.atom -l java $GITHUB_WORKSPACE/repotests/shiftleft-java-example -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/java.slices.json
Expand All @@ -69,6 +74,7 @@ jobs:
# ./atom.sh usages -o /tmp/juice3.atom -l js $GITHUB_WORKSPACE/repotests/juice-shop -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/juice.usages.json
./atom.sh usages -o /tmp/ts3.atom -l js $GITHUB_WORKSPACE/repotests/shiftleft-ts-example -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/ts.usages.json
./atom.sh usages -o /tmp/py3.atom -l python $GITHUB_WORKSPACE/repotests/DjanGoat -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/py.usages.json
./atom.sh usages -o /tmp/py4.atom -l python $GITHUB_WORKSPACE/repotests/django-DefectDojo -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/py4.usages.json
./atom.sh usages -o /tmp/c3.atom -l c $GITHUB_WORKSPACE/repotests/libexpat -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/c.usages.json
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
Expand All @@ -77,6 +83,9 @@ jobs:
npm install -g @cyclonedx/cdxgen --omit=optional
cdxgen -t java --deep -o $GITHUB_WORKSPACE/repotests/java-sec-code/bom.json $GITHUB_WORKSPACE/repotests/java-sec-code
./atom.sh reachables -o /tmp/java-sec-code.atom -l java $GITHUB_WORKSPACE/repotests/java-sec-code -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/java-sec-code.reachables.json
cdxgen -t python --deep -o $GITHUB_WORKSPACE/repotests/django-DefectDojo/bom.json $GITHUB_WORKSPACE/repotests/django-DefectDojo
./atom.sh reachables -o /tmp/django-DefectDojo.atom -l python $GITHUB_WORKSPACE/repotests/django-DefectDojo -Dlog4j.configurationFile=log4j2.xml --slice-outfile /tmp/django-DefectDojo.reachables.json
if: runner.os != 'Windows'
env:
JAVA_TOOL_OPTIONS: "-Dfile.encoding=UTF-8"
- run: |
Expand Down
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name := "atom"
ThisBuild / organization := "io.appthreat"
ThisBuild / version := "1.5.2"
ThisBuild / version := "1.5.3"
ThisBuild / scalaVersion := "3.3.1"

val chenVersion = "0.0.20"
val chenVersion = "0.5.2"

lazy val atom = Projects.atom

Expand Down
2 changes: 1 addition & 1 deletion lib/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
org.eclipse.cdt jars were downloaded from

https://download.eclipse.org/tools/cdt/releases/11.2/cdt-11.2.0/plugins/
https://download.eclipse.org/tools/cdt/releases/11.3/cdt-11.3.1/plugins/
Binary file not shown.
1 change: 1 addition & 0 deletions src/main/scala/io/appthreat/atom/Atom.scala
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ object Atom {
new DataFlowSlicing().calculateDataFlowSlice(cpg, dataFlowConfig.asInstanceOf[DataFlowConfig])
case x: AtomUsagesConfig =>
println("Slicing the atom for usages. This might take a few minutes ...")
new ChennaiTagsPass(cpg).createAndApply()
val usagesConfig = migrateAtomConfigToSliceConfig(x)
Option(UsageSlicing.calculateUsageSlice(cpg, usagesConfig.asInstanceOf[UsagesConfig]))
case x: AtomReachablesConfig =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ object PythonDependencyParser extends XDependencyParser {
.filterNot(_ == "N/A")
.map(x => ScalaFile(x))
.l
val parentList = fileList.flatMap(_.parentOption.map(_.pathAsString))
cpg.imports
.whereNot(_.call.file.name(".*setup.py"))
.filterNot {
Expand All @@ -92,8 +91,7 @@ object PythonDependencyParser extends XDependencyParser {
}
.dedup
.importedEntity
.flatMap(_.split('.').headOption)
.map(x => ModuleWithVersion(x))
.map(x => ModuleWithVersion(name = x.split('.').head, importedSymbols = x))
.toSet
}

Expand Down
23 changes: 18 additions & 5 deletions src/main/scala/io/appthreat/atom/parsedeps/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,22 +36,35 @@ package object parsedeps {
}

implicit val moduleWithVersionEncoder: Encoder[ModuleWithVersion] =
Encoder.forProduct3("name", "version", "versionSpecifiers")(x => (x.name, x.version, x.versionSpecifiers))
Encoder.forProduct4("name", "version", "versionSpecifiers", "importedSymbols")(x =>
(x.name, x.version, x.versionSpecifiers, x.importedSymbols)
)
implicit val moduleWithVersionDecoder: Decoder[ModuleWithVersion] =
Decoder.forProduct3("name", "version", "versionSpecifiers")(ModuleWithVersion.apply)
Decoder.forProduct4("name", "version", "versionSpecifiers", "importedSymbols")(ModuleWithVersion.apply)

case class DependencySlice(modules: Seq[ModuleWithVersion]) extends AtomSlice {
override def toJson: String = this.asJson.spaces2
}

case class ModuleWithVersion(name: String, version: String = "", versionSpecifiers: String = "") {
case class ModuleWithVersion(
name: String,
version: String = "",
versionSpecifiers: String = "",
importedSymbols: String = ""
) {

def merge(x: ModuleWithVersion): ModuleWithVersion = {
val vs = this.versions ++ x.versions
val is = this.importedSymbols + "," + x.importedSymbols
vs.find(_.startsWith("==")) match
case Some(exactVersion) =>
ModuleWithVersion(name, exactVersion.stripPrefix("=="), (vs diff Set(exactVersion)).mkString(","))
case None => ModuleWithVersion(name, versionSpecifiers = vs.mkString(","))
ModuleWithVersion(
name,
exactVersion.stripPrefix("=="),
(vs diff Set(exactVersion)).mkString(","),
importedSymbols = is
)
case None => ModuleWithVersion(name, versionSpecifiers = vs.mkString(","), importedSymbols = is)

}

Expand Down
20 changes: 11 additions & 9 deletions src/main/scala/io/appthreat/atom/slicing/ReachableSlicing.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,23 @@ object ReachableSlicing {
.toList
flowsList ++=
atom.tag.name(API_TAG).parameter.reachableByFlows(atom.tag.name(API_TAG).parameter).map(toSlice).toList
// For JavaScript, we need flows between arguments of call nodes to track callbacks and middlewares
if (language == Languages.JSSRC || language == Languages.JAVASCRIPT) {
def jsCallSource = atom.tag.name(config.sourceTag).call.argument.isIdentifier
def jsFrameworkIdentifier = atom.tag.name(FRAMEWORK_TAG).identifier
def jsFrameworkParameter = atom.tag.name(FRAMEWORK_TAG).parameter
def jsSink = atom.tag.name(config.sinkTag).call.argument.isIdentifier
flowsList ++= jsSink
.reachableByFlows(jsCallSource, jsFrameworkIdentifier, jsFrameworkParameter)
// For JavaScript and Python, we need flows between arguments of call nodes to track callbacks and middlewares
if (
language == Languages.JSSRC || language == Languages.JAVASCRIPT || language == Languages.PYTHON || language == Languages.PYTHONSRC
) {
def dynCallSource = atom.tag.name(config.sourceTag).call.argument.isIdentifier
def dynFrameworkIdentifier = atom.tag.name(FRAMEWORK_TAG).identifier
def dynFrameworkParameter = atom.tag.name(FRAMEWORK_TAG).parameter
def dynSink = atom.tag.name(config.sinkTag).call.argument.isIdentifier
flowsList ++= dynSink
.reachableByFlows(dynCallSource, dynFrameworkIdentifier, dynFrameworkParameter)
.map(toSlice)
.toList
flowsList ++= atom.tag
.name(FRAMEWORK_TAG)
.call
.argument
.reachableByFlows(jsFrameworkParameter)
.reachableByFlows(dynFrameworkParameter)
.map(toSlice)
.toList
}
Expand Down
56 changes: 56 additions & 0 deletions src/main/scala/io/appthreat/atom/slicing/UsageSlicing.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.codepropertygraph.generated.{Languages, Operators, PropertyNames}
import io.shiftleft.semanticcpg.language.*
import overflowdb.PropertyKey

import java.util.concurrent.*
import java.util.concurrent.atomic.AtomicBoolean
Expand All @@ -21,6 +22,7 @@ object UsageSlicing {
val exec: ExecutorService = Executors.newWorkStealingPool(Runtime.getRuntime.availableProcessors() / 2)
private val constructorTypeMatcher = Pattern.compile(".*new (\\w+)\\(.*")
private val excludeOperatorCalls = new AtomicBoolean(true)
private val FRAMEWORK_ROUTE = "framework-route"

/** Generates object slices from the given CPG.
*
Expand All @@ -44,6 +46,8 @@ object UsageSlicing {
val userDefTypes = userDefinedTypes(atom)
if (language.get == Languages.NEWC || language.get == Languages.C)
ProgramUsageSlice(slices ++ importsAsSlices(atom), userDefTypes)
else if (language.get == Languages.PYTHON || language.get == Languages.PYTHONSRC)
ProgramUsageSlice(slices, userDefTypes ++ routesAsUDT(atom))
else
ProgramUsageSlice(slices, userDefTypes)
}
Expand Down Expand Up @@ -101,6 +105,58 @@ object UsageSlicing {
})
}

/** Discovers internally defined routes.
*
* @param atom
* the CPG to query for types.
* @return
* a list of user defined types.
*/
def routesAsUDT(atom: Cpg): List[UserDefinedType] = {

def generateUDT(call: Call): UserDefinedType = {
UserDefinedType(
call.name,
call.argument.isLiteral
.map(m =>
LocalDef(
name = m.code,
typeFullName = m.typeFullName,
lineNumber = Option(m.property(new PropertyKey[Integer](PropertyNames.LINE_NUMBER))).map(_.toInt),
columnNumber = Option(m.property(new PropertyKey[Integer](PropertyNames.COLUMN_NUMBER))).map(_.toInt)
)
)
.collectAll[LocalDef]
.l,
call
.callee(NoResolve)
.method
.filterNot(m => m.name.startsWith("<clinit>"))
.map(m =>
ObservedCall(
m.name,
Option(m.fullName),
m.parameter.map(_.typeFullName).toList,
m.methodReturn.typeFullName,
Option(m.isExternal),
m.lineNumber.map(_.intValue()),
m.columnNumber.map(_.intValue())
)
)
.l,
call.location.filename,
call.lineNumber.map(_.intValue()),
call.columnNumber.map(_.intValue())
)
}

atom.call
.where(_.argument.tag.nameExact(FRAMEWORK_ROUTE))
.map(generateUDT)
.filter(udt => udt.fields.nonEmpty || udt.procedures.nonEmpty)
.l
}

private def TimedGet(dsf: Future[Option[(Method, ObjectUsageSlice)]]) = {
try {
dsf.get(5, TimeUnit.SECONDS)
Expand Down
26 changes: 13 additions & 13 deletions src/test/scala/io/appthreat/atom/PythonDependencyScannerTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -197,19 +197,19 @@ class PythonDependencyScannerTests extends PySrc2CpgFixture(withOssDataflow = fa
"have the modules scanned successfully" in {
val scanResult = PythonDependencyParser.parse(cpg)
scanResult.modules shouldBe List(
ModuleWithVersion("PackageC", "1.2.0.dev1+hg.5.b11e5e6f0b0b"),
ModuleWithVersion("PickyThing", "2.4c1", "<1.6,>1.9,!=1.9.6,<2.0a0"),
ModuleWithVersion("certifi", "", ">=2017.4.17"),
ModuleWithVersion("charset_normalizer", "", ">=2,<4"),
ModuleWithVersion("idna", "", ">=2.5,<4"),
ModuleWithVersion("os", "", ""),
ModuleWithVersion("packageA", "", ">=1.4.2,<1.9,!=1.5.*,!=1.6.*"),
ModuleWithVersion("packageB", "", ">=0.5.0,< 0.7.0"),
ModuleWithVersion("re-wx", "", ">=0.0.2"),
ModuleWithVersion("socket", "", ""),
ModuleWithVersion("typing-extensions", "3.10.0.2", ""),
ModuleWithVersion("urllib3", "", ">=1.21.1,<3"),
ModuleWithVersion("zope.interface", "", ">=5.1.0")
ModuleWithVersion("PackageC", "1.2.0.dev1+hg.5.b11e5e6f0b0b", ""),
ModuleWithVersion("PickyThing", "2.4c1", "<1.6,>1.9,!=1.9.6,<2.0a0", ""),
ModuleWithVersion("certifi", "", ">=2017.4.17", ""),
ModuleWithVersion("charset_normalizer", "", ">=2,<4", ""),
ModuleWithVersion("idna", "", ">=2.5,<4", ""),
ModuleWithVersion("os", "", "", "os.path"),
ModuleWithVersion("packageA", "", ">=1.4.2,<1.9,!=1.5.*,!=1.6.*", ""),
ModuleWithVersion("packageB", "", ">=0.5.0,< 0.7.0", ""),
ModuleWithVersion("re-wx", "", ">=0.0.2", ""),
ModuleWithVersion("socket", "", "", "socket"),
ModuleWithVersion("typing-extensions", "3.10.0.2", "", ""),
ModuleWithVersion("urllib3", "", ">=1.21.1,<3", "urllib3.poolmanager.proxy_from_url,urllib3.util.Timeout,urllib3.exceptions.LocationValueError,urllib3.contrib.socks.SOCKSProxyManager,urllib3.exceptions.HTTPError,urllib3.exceptions.SSLError,urllib3.exceptions.ProxyError,urllib3.exceptions.InvalidHeader,urllib3.exceptions.MaxRetryError,urllib3.exceptions.ConnectTimeoutError,urllib3.exceptions.ClosedPoolError,urllib3.exceptions.ProtocolError,urllib3.util.retry.Retry,urllib3.exceptions.ResponseError,,urllib3.exceptions.ReadTimeoutError,urllib3.exceptions.NewConnectionError,urllib3.util.parse_url,urllib3.poolmanager.PoolManager"),
ModuleWithVersion("zope.interface", "", ">=5.1.0", "")
)
}
}
Expand Down
7 changes: 5 additions & 2 deletions wrapper/nodejs/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import { freemem, platform as _platform } from "node:os";
import { dirname, join, delimiter } from "node:path";
import { readFileSync } from "node:fs";

import { spawnSync } from "node:child_process";
import { fileURLToPath } from "node:url";
import { detectJava } from "./utils.mjs";
Expand All @@ -12,7 +14,8 @@ if (!url.startsWith("file://")) {
url = new URL(`file://${import.meta.url}`).toString();
}
const dirName = import.meta ? dirname(fileURLToPath(url)) : __dirname;

const selfPJson = JSON.parse(readFileSync(join(dirName, "package.json")));
const _version = selfPJson.version;
export const LOG4J_CONFIG = join(dirName, "plugins", "log4j2.xml");
export const ATOM_HOME = join(dirName, "plugins");
export const APP_LIB_DIR = join(ATOM_HOME, "lib");
Expand All @@ -22,7 +25,7 @@ export const JAVA_OPTS = `${
process.env.JAVA_OPTS || ""
} -Xmx${freeMemoryGB}G ${JVM_ARGS}`;
export const APP_MAIN_CLASS = "io.appthreat.atom.Atom";
export const ATOM_VERSION = "1.5.2";
export const ATOM_VERSION = _version;
export const APP_CLASSPATH = join(
APP_LIB_DIR,
`io.appthreat.atom-${ATOM_VERSION}-classpath.jar`
Expand Down
4 changes: 2 additions & 2 deletions wrapper/nodejs/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion wrapper/nodejs/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@appthreat/atom",
"version": "1.5.2",
"version": "1.5.3",
"description": "Create atom (⚛) representation for your application, packages and libraries",
"exports": "./index.js",
"type": "module",
Expand Down

0 comments on commit 1b0a8af

Please sign in to comment.