Skip to content

Commit

Permalink
Merge pull request #1247 from Privado-Inc/dev
Browse files Browse the repository at this point in the history
Release PR
  • Loading branch information
khemrajrathore authored Aug 14, 2024
2 parents 70631a3 + f39439c commit c6f6f2c
Show file tree
Hide file tree
Showing 69 changed files with 1,762 additions and 609 deletions.
74 changes: 74 additions & 0 deletions src/main/scala/ai/privado/cache/FileLinkingMetadata.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package ai.privado.cache

import sourcecode.FileName

import scala.collection.mutable
class FileLinkingMetadata {

private val dataflowMap = mutable.HashMap[String, mutable.HashSet[String]]()
private val fileImportMap = mutable.HashMap[String, mutable.HashSet[String]]()

/** Given dataflow file paths, the function calculate all permutations and stores it in dataflowMap
*
* For input List(List(a,b,c), List(c,d)) we will store
*
* a -> (a,b,c)
*
* b -> (a,b,c)
*
* c -> (a,b,c,d)
*
* d -> (c,d)
*
* @param dataflowFiles
*/
def addToDataflowMap(dataflowFiles: List[List[String]]): Unit = {

val pairs = for {
sublist <- dataflowFiles
elem <- sublist
} yield (elem, sublist)

/* Explaining the above piece of code
dataflow files => List(List(a,b,c), List(c,d))
pairs => List((a, List(a,b,c)), (b, List(a,b,c)), (c, List(a,b,c)), (c, List(c,d), (d, List(c,d)))
*/
val grouped = pairs.groupBy(_._1)

grouped.foreach { case (key, valuePairs) =>
if (!dataflowMap.contains(key))
dataflowMap(key) = mutable.HashSet[String]()
dataflowMap(key).addAll(valuePairs.flatMap(_._2).distinct)
}
/*
Here we will have dataflowMap as
a -> (a,b,c)
b -> (a,b,c)
c -> (a,b,c,d)
d -> (c,d)
*/
}

/** Get dataflowMapping of files
* @return
*/
def getDataflowMap: Map[String, mutable.HashSet[String]] = this.dataflowMap.toMap

/** Add a mapping of fileName -> List(importedFile1, importedFile2)
*
* This basically corresponds to `importedFile1`, `importedFile2` being imported in fileName`
* @param fileName
* @param importFiles
*/
def addToFileImportMap(fileName: String, importedFile: String): Unit = synchronized {

if (!fileImportMap.contains(fileName))
fileImportMap(fileName) = mutable.HashSet[String]()
fileImportMap(fileName).addOne(importedFile)
}

/** Get File to importedFiles mapping
* @return
*/
def getFileImportMap: Map[String, mutable.HashSet[String]] = this.fileImportMap.toMap
}
35 changes: 34 additions & 1 deletion src/main/scala/ai/privado/entrypoint/CommandParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ case class PrivadoInput(
showUnresolvedFunctionsReport: Boolean = false,
generateAuditReport: Boolean = false,
dedSourceReport: Boolean = false,
fileLinkingReport: Boolean = false,
enableAuditSemanticsFilter: Boolean = false,
limitNoSinksForDataflows: Int = -1,
limitArgExpansionDataflows: Int = -1,
Expand All @@ -69,7 +70,10 @@ case class PrivadoInput(
rubyParserTimeout: Long = 120,
excludeFileRegex: String = "",
extensionsForPhp: String = "",
isSkipHeaderFileContext: Boolean = false
isSkipHeaderFileContext: Boolean = false,

// Metadata flags
isDeltaFileScan: Boolean = false
)

object CommandConstants {
Expand Down Expand Up @@ -117,6 +121,8 @@ object CommandConstants {
val GENERATE_AUDIT_REPORT_ABBR = "gar"
val DED_SOURCE_REPORT = "ded-source-report"
val DED_SOURCE_REPORT_ABBR = "dsr"
val FILE_LINKING_REPORT = "file-linking-report"
val FILE_LINKING_REPORT_ABBR = "flr"
val ENABLE_AUDIT_SEMANTIC_FILTER = "enable-audit-semantic"
val ENABLE_AUDIT_SEMANTIC_FILTER_ABBR = "eas"
val LIMIT_NO_SINKS_FOR_DATAFLOWS = "limit-no-sinks-for-dataflows"
Expand All @@ -142,6 +148,9 @@ object CommandConstants {
val EXTENSIONS_FOR_PHP_ABBR = "exphp"
val IS_SKIP_HEADER_FILE_CONTEXT = "skip-header-file-context"
val IS_SKIP_HEADER_FILE_CONTEXT_ABBR = "shfc"

// Metadata flags
val IS_DELTA_FILE_SCAN = "delta-file-scan"
}

object CommandParser {
Expand Down Expand Up @@ -289,6 +298,11 @@ object CommandParser {
.optional()
.action((_, c) => c.copy(dedSourceReport = true))
.text("Export the ded source report"),
opt[Unit](CommandConstants.FILE_LINKING_REPORT)
.abbr(CommandConstants.FILE_LINKING_REPORT_ABBR)
.optional()
.action((_, c) => c.copy(fileLinkingReport = true))
.text("Export the file linking report"),
opt[Unit](CommandConstants.ENABLE_AUDIT_SEMANTIC_FILTER)
.abbr(CommandConstants.ENABLE_AUDIT_SEMANTIC_FILTER_ABBR)
.optional()
Expand Down Expand Up @@ -391,6 +405,25 @@ object CommandParser {
.required()
.action((x, c) => c.copy(sourceLocation = c.sourceLocation + x))
.text("Source code location"),
opt[String](CommandConstants.INTERNAL_CONFIG)
.abbr(CommandConstants.INTERNAL_CONFIG_ABBR)
.required()
.action((x, c) => c.copy(internalConfigPath = c.internalConfigPath + x))
.text("Internal config and rule files location"),
opt[String](CommandConstants.EXTERNAL_CONFIG)
.abbr(CommandConstants.EXTERNAL_CONFIG_ABBR)
.optional()
.action((x, c) => c.copy(externalConfigPath = c.externalConfigPath + x))
.text("External config and rule files location"),
opt[Unit](CommandConstants.FILE_LINKING_REPORT)
.abbr(CommandConstants.FILE_LINKING_REPORT_ABBR)
.optional()
.action((_, c) => c.copy(fileLinkingReport = true))
.text("Export the file linking report"),
opt[Unit](CommandConstants.IS_DELTA_FILE_SCAN)
.optional()
.action((_, c) => c.copy(isDeltaFileScan = true))
.text("Generate metadata for delta scan"),
checkConfig(c =>
if (c.cmd.isEmpty) failure("")
else success
Expand Down
12 changes: 11 additions & 1 deletion src/main/scala/ai/privado/entrypoint/Main.scala
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@ object Main extends GeneralMetadataLoggers {
)
)
)
case MetadataProcessor =>
statsRecorder.startRecordingWithGivenFrequency(
Some(
TimeMetricRecordConfig(
basePath = s"${MetadataProcessor.config.sourceLocation.head}/.privado",
threadDumpFreq = MetadataProcessor.config.threadDumpFreq,
threadDumpAvgCPULimit = MetadataProcessor.config.threadDumpAvgCPULimit
)
)
)
case _ =>
}
MetricHandler.timeMetric(processor.process(appCache), "Complete") match {
Expand All @@ -80,7 +90,7 @@ object Main extends GeneralMetadataLoggers {
// any user-facing non-debug logging to be done internally
logger.debug("Failure from scan process:", e)
logger.debug("Skipping auth flow due to scan failure")
logger.error("Error in scanning, skipping auth flow : " + e.getMessage)
logger.error("Error in scanning, skipping auth flow : ", e.getMessage)
MetricHandler.compileAndSend(appCache)
// NOTE: Removed the finally as it will not be invoked after exit(1) is called in exeption.
// exit(1) is important to indicate scan failure to outer process.
Expand Down
126 changes: 115 additions & 11 deletions src/main/scala/ai/privado/entrypoint/MetadataProcessor.scala
Original file line number Diff line number Diff line change
@@ -1,26 +1,130 @@
package ai.privado.entrypoint

import ai.privado.cache.AppCache
import ai.privado.cache.{
AppCache,
AuditCache,
DataFlowCache,
DatabaseDetailsCache,
FileLinkingMetadata,
PropertyFilterCache,
S3DatabaseDetailsCache
}
import ai.privado.languageEngine.javascript.processor.JavascriptBaseCPGProcessor
import ai.privado.languageEngine.python.processor.PythonBaseCPGProcessor
import ai.privado.metadata.SystemInfo
import ai.privado.metric.MetricHandler
import ai.privado.model.Constants
import ai.privado.model.Language.UNKNOWN
import ai.privado.model.*
import better.files.File
import io.circe.Json
import io.joern.console.cpgcreation.guessLanguage
import ai.privado.entrypoint.MetadataProcessor.statsRecorder

import scala.util.{Failure, Success, Try}

object MetadataProcessor extends CommandProcessor {
object MetadataProcessor extends CommandProcessor with RuleProcessor {

private val auditCache = new AuditCache
private val s3DatabaseDetailsCache = new S3DatabaseDetailsCache
private val propertyFilterCache = new PropertyFilterCache()
private val databaseDetailsCache = new DatabaseDetailsCache()
private val fileLinkingMetadata = new FileLinkingMetadata()

def getDataflowCache: DataFlowCache = {
new DataFlowCache(config, auditCache)
}

override def process(appCache: AppCache): Either[String, Unit] = {

def generateMetadata(): SystemInfo = {
val systemInfo = SystemInfo.getInfo
SystemInfo.dumpInfoToFile(config.sourceLocation.head, Constants.systemInfoFileName, systemInfo)
systemInfo
if (config.isDeltaFileScan) {
processCpg(appCache)
} else {
Try(generateMetadata()) match
case Failure(exception) =>
println(s"Exception when processing metadata command : ${exception.toString}")
Left(exception.toString)
case Success(systemInfo) => Right(systemInfo)
}
}

def generateMetadata(): SystemInfo = {
val systemInfo = SystemInfo.getInfo
SystemInfo.dumpInfoToFile(config.sourceLocation.head, Constants.systemInfoFileName, systemInfo)
systemInfo
}

private def processCpg(appCache: AppCache): Either[String, Unit] = {
val sourceRepoLocation = File(config.sourceLocation.head).path.toAbsolutePath.toString.stripSuffix("/")
val excludeFileRegex = config.excludeFileRegex
// Setting up the application cache
appCache.init(sourceRepoLocation, excludeFileRegex = excludeFileRegex)
statsRecorder.initiateNewStage("Language detection")
val languageDetected = if (config.forceLanguage == UNKNOWN) {
val langDect = Try(guessLanguage(sourceRepoLocation))
statsRecorder.endLastStage()
Language.withJoernLangName(langDect)
} else {
statsRecorder.justLogMessage("Language forced ...")
statsRecorder.endLastStage()
config.forceLanguage
}
MetricHandler.metricsData("language") = Json.fromString(languageDetected.toString)

Try(generateMetadata()) match
case Failure(exception) =>
println(s"Exception when processing metadata command : ${exception.toString}")
Left(exception.toString)
case Success(systemInfo) => Right(systemInfo)
languageDetected match {
case Language.JAVASCRIPT =>
statsRecorder.justLogMessage("Detected language 'JavaScript'")
new JavascriptBaseCPGProcessor(
getProcessedRule(Set(Language.JAVASCRIPT), appCache, statsRecorder, config),
this.config,
sourceRepoLocation,
dataFlowCache = getDataflowCache,
AuditCache(),
S3DatabaseDetailsCache(),
appCache,
statsRecorder = statsRecorder,
databaseDetailsCache = databaseDetailsCache,
propertyFilterCache = propertyFilterCache,
fileLinkingMetadata = fileLinkingMetadata
).processCpg()
case Language.PYTHON =>
statsRecorder.justLogMessage("Detected language 'Python'")
new PythonBaseCPGProcessor(
getProcessedRule(Set(Language.PYTHON), appCache, statsRecorder, config),
this.config,
sourceRepoLocation,
dataFlowCache = getDataflowCache,
auditCache,
s3DatabaseDetailsCache,
appCache,
propertyFilterCache = propertyFilterCache,
databaseDetailsCache = databaseDetailsCache,
statsRecorder = statsRecorder,
fileLinkingMetadata = fileLinkingMetadata
).processCpg()
case _ =>
println("language not supported yet..")
statsRecorder.justLogMessage("Language not detected, force scanning using Javascript engine")
new JavascriptBaseCPGProcessor(
getProcessedRule(Set(Language.JAVASCRIPT), appCache, statsRecorder, config),
this.config,
sourceRepoLocation,
dataFlowCache = getDataflowCache,
AuditCache(),
S3DatabaseDetailsCache(),
appCache,
statsRecorder = statsRecorder,
databaseDetailsCache = databaseDetailsCache,
propertyFilterCache = propertyFilterCache,
fileLinkingMetadata = fileLinkingMetadata
).processCpg()
} match {
case Left(err: String) => Left(err)
case _ =>
Right(
()
) // Ignore the result as not needed for further step, and due to discrepency in output for New and old frontends
}
}

}
Loading

0 comments on commit c6f6f2c

Please sign in to comment.