Skip to content

Commit

Permalink
Merge pull request #1235 from Privado-Inc/dev
Browse files Browse the repository at this point in the history
Release PR
  • Loading branch information
pandurangpatil authored Aug 5, 2024
2 parents ec46ab5 + 67db0c2 commit af3a8e5
Show file tree
Hide file tree
Showing 15 changed files with 639 additions and 3 deletions.
3 changes: 2 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ThisBuild / version := sys.env.getOrElse("BUILD_VERSION", "dev-SNAPSHOT")
// parsed by project/Versions.scala, updated by updateDependencies.sh

val cpgVersion = "0.1.6"
val joernVersion = "0.1.16"
val joernVersion = "0.1.18"
val overflowdbVersion = "0.1.5"
val requests = "0.8.0"
val upickle = "3.1.2"
Expand Down Expand Up @@ -38,6 +38,7 @@ libraryDependencies ++= Seq(
"io.joern" %% "kotlin2cpg" % Versions.joern exclude ("com.squareup.okhttp3", "okhttp") exclude ("com.squareup.okio", "okio"),
"io.joern" %% "gosrc2cpg" % Versions.joern,
"io.joern" %% "csharpsrc2cpg" % Versions.joern,
"io.joern" %% "c2cpg" % Versions.joern,
"io.joern" %% "joern-cli" % Versions.joern exclude ("io.undertow", "undertow-core"),
"io.joern" %% "semanticcpg" % Versions.joern,
"io.joern" %% "semanticcpg" % Versions.joern % Test classifier "tests",
Expand Down
10 changes: 9 additions & 1 deletion src/main/scala/ai/privado/entrypoint/CommandParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ case class PrivadoInput(
threadDumpAvgCPULimit: Int = DEFAULT_THREAD_DUMP_AVG_CPU_LIMIT,
rubyParserTimeout: Long = 120,
excludeFileRegex: String = "",
extensionsForPhp: String = ""
extensionsForPhp: String = "",
isSkipHeaderFileContext: Boolean = false
)

object CommandConstants {
Expand Down Expand Up @@ -139,6 +140,8 @@ object CommandConstants {
val EXCLUDE_FILE_REGEX_ABBR = "efr"
val EXTENSIONS_FOR_PHP = "extensions-for-php"
val EXTENSIONS_FOR_PHP_ABBR = "exphp"
val IS_SKIP_HEADER_FILE_CONTEXT = "skip-header-file-context"
val IS_SKIP_HEADER_FILE_CONTEXT_ABBR = "shfc"
}

object CommandParser {
Expand Down Expand Up @@ -335,6 +338,11 @@ object CommandParser {
.text(
"File extensions that are considered valid for PHP scanner. Eg \".php,.cls,.ent\" Default is \".php\""
),
opt[Unit](CommandConstants.IS_SKIP_HEADER_FILE_CONTEXT)
.abbr(CommandConstants.IS_SKIP_HEADER_FILE_CONTEXT_ABBR)
.optional()
.action((x, c) => c.copy(isSkipHeaderFileContext = true))
.text("Skip header file context when processing code file"),
arg[String]("<Source directory>")
.required()
.action((x, c) => c.copy(sourceLocation = c.sourceLocation + x))
Expand Down
19 changes: 18 additions & 1 deletion src/main/scala/ai/privado/entrypoint/ScanProcessor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ package ai.privado.entrypoint

import ai.privado.cache.*
import ai.privado.entrypoint.ScanProcessor.statsRecorder
import ai.privado.languageEngine.c.processor.CProcessor
import ai.privado.languageEngine.csharp.processor.CSharpProcessor
import ai.privado.languageEngine.default.processor.DefaultProcessor
import ai.privado.languageEngine.go.processor.GoProcessor
Expand All @@ -38,7 +39,7 @@ import ai.privado.metric.MetricHandler
import ai.privado.model.*
import ai.privado.model.Language.{Language, UNKNOWN}
import ai.privado.rulevalidator.YamlFileValidator
import ai.privado.utility.Utilities.{isValidRule, isValidDEDRule}
import ai.privado.utility.Utilities.{isValidDEDRule, isValidRule}
import ai.privado.utility.StatsRecorder
import better.files.File
import io.circe.Json
Expand Down Expand Up @@ -416,6 +417,7 @@ object ScanProcessor extends CommandProcessor {
Language.withJoernLangName(langDect)
} else {
statsRecorder.justLogMessage("Language forced ...")
statsRecorder.endLastStage()
config.forceLanguage
}
MetricHandler.metricsData("language") = Json.fromString(languageDetected.toString)
Expand Down Expand Up @@ -553,6 +555,21 @@ object ScanProcessor extends CommandProcessor {
propertyFilterCache = propertyFilterCache
)
.processCpg()
case Language.C =>
statsRecorder.justLogMessage("Detected language 'C'")
CProcessor(
getProcessedRule(Set(Language.C), appCache),
this.config,
sourceRepoLocation,
dataFlowCache = getDataflowCache,
auditCache,
s3DatabaseDetailsCache,
appCache,
statsRecorder = statsRecorder,
databaseDetailsCache = databaseDetailsCache,
propertyFilterCache = propertyFilterCache
)
.processCpg()
case _ =>
processCpgWithDefaultProcessor(sourceRepoLocation, appCache, statsRecorder)
} match {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package ai.privado.languageEngine.c.passes

import io.joern.x2cpg.passes.frontend.XImportResolverPass
import io.shiftleft.codepropertygraph.generated.Cpg
import io.shiftleft.semanticcpg.language.importresolver.*
import io.shiftleft.codepropertygraph.generated.nodes.{Call, Import}
import io.shiftleft.semanticcpg.language.*

class CImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {

override def runOnPart(builder: DiffGraphBuilder, part: Import): Unit = for {
importedAs <- part.importedAs
importedEntity <- part.importedEntity
} {
val fileName = part.file.name.headOption.getOrElse("<unknown>").stripPrefix(codeRootDir)
optionalResolveImportNode(fileName, part, importedEntity, importedAs, builder)
}

override protected def optionalResolveImport(
fileName: String,
importCall: Call,
importedEntity: String,
importedAs: String,
diffGraph: DiffGraphBuilder
): Unit = {
val operation = "donothing"
}

private def optionalResolveImportNode(
fileName: String,
importNode: Import,
importedEntity: String,
importedAs: String,
diffGraph: DiffGraphBuilder
): Unit = {
val fileDepth = importedEntity.split("/").size
val headerFileName = (fileName.split("/").dropRight(fileDepth) match {
case x if x.isEmpty => ""
case x => x.mkString("/").concat("/")
}).concat(s"$importedEntity")

resolveEntity(headerFileName, importNode).foreach(x => evaluatedImportNodeToTag(x, importNode, diffGraph))
}

private def resolveEntity(headerFileName: String, importNode: Import) = {
val methods = cpg.file.nameExact(headerFileName).method.map(m => ResolvedMethod(m.fullName, m.name)).l

val typeDecls = cpg.file
.nameExact(headerFileName)
.typeDecl
.map(typeDecl => ResolvedTypeDecl(typeDecl.fullName))
.l
val tmp = (typeDecls ++ methods).collectAll[EvaluatedImport].toSet
tmp
}

private def evaluatedImportNodeToTag(x: EvaluatedImport, importNode: Import, diffGraph: DiffGraphBuilder): Unit =
importNode.start.newTagNodePair(x.label, x.serialize).store()(diffGraph)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package ai.privado.languageEngine.c.passes

import io.joern.x2cpg.passes.frontend.XTypeHintCallLinker
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Call
import io.shiftleft.semanticcpg.language.*

class CTypeHintCallLinker(cpg: Cpg) extends XTypeHintCallLinker(cpg) {

override protected def calls: Iterator[Call] = cpg.call
.nameNot("<operator>.*", "<operators>.*")
.filter(c => calleeNames(c).nonEmpty && (c.callee.isEmpty || c.callee.exists(_.isExternal)))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
package ai.privado.languageEngine.c.passes

import io.joern.x2cpg.Defines
import io.joern.x2cpg.passes.frontend.{
CallAlias,
LocalVar,
RecoverForXCompilationUnit,
XTypeRecovery,
XTypeRecoveryConfig,
XTypeRecoveryPassGenerator,
XTypeRecoveryState
}
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.*
import overflowdb.traversal.Traversal
import io.shiftleft.semanticcpg.language.*
import overflowdb.BatchedUpdate.DiffGraphBuilder
import io.shiftleft.semanticcpg.language.importresolver.*
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes
import io.shiftleft.semanticcpg.language.operatorextension.OpNodes.{Assignment, FieldAccess}
import io.joern.x2cpg.passes.frontend.XTypeRecovery.AllNodeTypesFromIteratorExt
import io.joern.x2cpg.passes.frontend.XTypeRecovery.AllNodeTypesFromNodeExt
import io.shiftleft.codepropertygraph.generated.{Operators, PropertyNames}

class CTypeRecoveryPassGenerator(cpg: Cpg, config: XTypeRecoveryConfig = XTypeRecoveryConfig())
extends XTypeRecoveryPassGenerator[File](cpg, config) {
override protected def generateRecoveryPass(state: XTypeRecoveryState, iteration: Int): XTypeRecovery[File] =
new CTypeRecovery(cpg, state, iteration)
}

private class CTypeRecovery(cpg: Cpg, state: XTypeRecoveryState, iteration: Int)
extends XTypeRecovery[File](cpg, state, iteration) {
override def compilationUnits: Traversal[File] = cpg.file.iterator

override def generateRecoveryForCompilationUnitTask(
unit: File,
builder: DiffGraphBuilder
): RecoverForXCompilationUnit[File] = {
new RecoverForCFile(cpg, unit, builder, state)
}
}

private class RecoverForCFile(cpg: Cpg, cu: File, builder: DiffGraphBuilder, state: XTypeRecoveryState)
extends RecoverForXCompilationUnit[File](cpg, cu, builder, state) {

/** A heuristic method to determine if a call is a constructor or not.
*/
override protected def isConstructor(c: Call): Boolean = {
isConstructor(c.name)
}

/** A heuristic method to determine if a call name is a constructor or not.
*/
override protected def isConstructor(name: String): Boolean =
!name.isBlank && (name.equals("new") || name.equals("<init>"))

override protected def importNodes: Iterator[Import] = cu match {
case x: File => cpg.imports.where(_.file.name(x.name))
case _ => super.importNodes
}

override protected def visitImport(i: Import): Unit = for {
resolvedImport <- i.tag
alias <- i.importedAs
} {
import scala.util.Try
Try(EvaluatedImport.tagToEvaluatedImport(resolvedImport)).toOption
.getOrElse(Option(UnknownMethod("random", "random", Option("random"))))
.foreach {
case ResolvedMethod(fullName, alias, receiver, _) =>
symbolTable.append(CallAlias(alias, receiver), fullName)
case ResolvedTypeDecl(fullName, _) =>
symbolTable.append(LocalVar(alias), fullName)
case ResolvedMember(basePath, memberName, _) =>
val matchingIdentifiers = cpg.method.fullNameExact(basePath).local
val matchingMembers = cpg.typeDecl.fullNameExact(basePath).member
val memberTypes = (matchingMembers ++ matchingIdentifiers)
.nameExact(memberName)
.getKnownTypes
symbolTable.append(LocalVar(alias), memberTypes)
case UnknownMethod(fullName, alias, receiver, _) =>
symbolTable.append(CallAlias(alias, receiver), fullName)
case UnknownTypeDecl(fullName, _) =>
symbolTable.append(LocalVar(alias), fullName)
case UnknownImport(path, _) =>
symbolTable.append(CallAlias(alias), path)
symbolTable.append(LocalVar(alias), path)
}
}

override protected def hasTypes(node: AstNode): Boolean = node match {
case x: Call if !x.methodFullName.startsWith("<operator>") =>
!x.methodFullName.toLowerCase().matches("(<unknownfullname>|any)") && !x.methodFullName.equals(x.name)
case x => x.getKnownTypes.nonEmpty
}

override protected def setCallMethodFullNameFromBase(c: Call): Set[String] = {
val recTypes = c.argument.headOption
.map {
case ifa: Call
if (ifa.name.equals("<operator>.indirectFieldAccess") || ifa.name.equals(
"<operator>.fieldAccess"
)) && ifa.argument.headOption.exists(symbolTable.contains) =>
getTypeFromArgument(ifa.argument.headOption, c)
case x => getTypeFromArgument(Some(x), c)
}
.getOrElse(Set.empty[String])
val callTypes = recTypes.map(_.stripSuffix("*").concat(s"$pathSep${c.name}"))
symbolTable.append(c, callTypes)
}

private def getTypeFromArgument(headArgument: Option[Expression], c: Call): Set[String] = {
headArgument
.map {
case x: Call if x.typeFullName != "ANY" && x.typeFullName != "<empty>" =>
Set(x.typeFullName)
case x: Call =>
val returns = cpg.method.fullNameExact(c.methodFullName).methodReturn.typeFullNameNot("ANY")
val returnWithPossibleTypes = cpg.method.fullNameExact(c.methodFullName).methodReturn.where(_.possibleTypes)
val fullNames = returns.typeFullName ++ returnWithPossibleTypes.possibleTypes
fullNames.toSet match {
case xs if xs.nonEmpty => xs
case _ =>
val returns = cpg.method.fullNameExact(x.methodFullName).methodReturn.typeFullNameNot("ANY")
val returnWithPossibleTypes =
cpg.method.fullNameExact(x.methodFullName).methodReturn.where(_.possibleTypes)
val fullNames = returns.typeFullName ++ returnWithPossibleTypes.possibleTypes
fullNames.toSet match {
case xs if xs.nonEmpty => xs
case _ => symbolTable.get(x).map(t => Seq(t, XTypeRecovery.DummyReturnType).mkString(pathSep))
}
}
case x =>
symbolTable.get(x)
}
.getOrElse(Set.empty[String])
}

override protected def setTypeInformation(): Unit = {
cu.ast
.collect {
case n: Local => n
case n: Call => n
case n: Expression => n
case n: MethodParameterIn if state.isFinalIteration => n
case n: MethodReturn if state.isFinalIteration => n
}
.foreach {
case x: Local if symbolTable.contains(x) => storeNodeTypeInfo(x, symbolTable.get(x).toSeq)
case x: MethodParameterIn => setTypeFromTypeHints(x)
case x: MethodReturn =>
setTypeFromTypeHints(x)
case x: Identifier if symbolTable.contains(x) =>
setTypeInformationForRecCall(x, x.inCall.headOption, x.inCall.argument.l)
case x: Call if symbolTable.contains(x) =>
val typs =
if (state.enableDummyTypesForThisIteration) symbolTable.get(x).toSeq
else symbolTable.get(x).filterNot(XTypeRecovery.isDummyType).toSeq
storeCallTypeInfo(x, typs)
case x: Identifier if symbolTable.contains(CallAlias(x.name)) && x.inCall.nonEmpty =>
setTypeInformationForRecCall(x, x.inCall.headOption, x.inCall.argument.l)
case x: Call
if x.argument.headOption.isCall.exists(c =>
c.name.equals("<operator>.indirectFieldAccess") || c.name.equals("<operator>.fieldAccess")
) && x.argument.headOption.isCall.argument.headOption.exists(c => symbolTable.contains(c)) =>
setCallMethodFullNameFromBase(x)
val typs =
if (state.enableDummyTypesForThisIteration) symbolTable.get(x).toSeq
else symbolTable.get(x).filterNot(XTypeRecovery.isDummyType).toSeq
storeCallTypeInfo(x, typs)
case x: Call if x.argument.headOption.exists(symbolTable.contains) =>
setTypeInformationForRecCall(x, Option(x), x.argument.l)
case _ =>
}
// Set types in an atomic way
newTypesForMembers.foreach { case (m, ts) => storeDefaultTypeInfo(m, ts.toSeq) }
}

private def storeNodeTypeInfo(storedNode: StoredNode, types: Seq[String]): Unit = {
lazy val existingTypes = storedNode.getKnownTypes

val hasUnknownTypeFullName = storedNode
.property(PropertyNames.TYPE_FULL_NAME, Defines.Any)
.matches(XTypeRecovery.unknownTypePattern.pattern.pattern())

if (types.nonEmpty && (hasUnknownTypeFullName || types.toSet != existingTypes)) {
storedNode match {
case m: Member =>
// To avoid overwriting member updates, we store them elsewhere until the end
newTypesForMembers.updateWith(m) {
case Some(ts) => Option(ts ++ types)
case None => Option(types.toSet)
}
case i: Identifier => storeIdentifierTypeInfo(i, types)
case l: Local => storeLocalTypeInfo(l, types)
case c: Call if !c.name.startsWith("<operator>") => storeCallTypeInfo(c, types)
case _: Call =>
case n =>
setTypes(n, types)
}
}
}

private def setTypeInformationForRecCall(x: AstNode, n: Option[Call], ms: List[AstNode]): Unit = {
(n, ms) match {
// Case 1: 'call' is an assignment from some dynamic dispatch call
case (Some(call: Call), ::(i: Identifier, ::(c: Call, _))) if call.name == Operators.assignment =>
setTypeForIdentifierAssignedToCall(call, i, c)
// Case 1: 'call' is an assignment from some other data structure
case (Some(call: Call), ::(i: Identifier, _)) if call.name == Operators.assignment =>
setTypeForIdentifierAssignedToDefault(call, i)
// Case 2: 'i' is the receiver of 'call'
case (Some(call: Call), ::(i: Identifier, _)) if call.name != Operators.fieldAccess =>
setTypeForDynamicDispatchCall(call, i)
// Case 3: 'i' is the receiver for a field access on member 'f'
case (Some(fieldAccess: Call), ::(i: Identifier, ::(f: FieldIdentifier, _)))
if fieldAccess.name == Operators.fieldAccess =>
setTypeForFieldAccess(fieldAccess.asInstanceOf[FieldAccess], i, f)
case _ =>
}
// Handle the node itself
x match {
case c: Call if c.name.startsWith("<operator") =>
case _ => persistType(x, symbolTable.get(x))
}
}
}
Loading

0 comments on commit af3a8e5

Please sign in to comment.