-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1247 from Privado-Inc/dev
Release PR
- Loading branch information
Showing
69 changed files
with
1,762 additions
and
609 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package ai.privado.cache | ||
|
||
import sourcecode.FileName | ||
|
||
import scala.collection.mutable | ||
class FileLinkingMetadata { | ||
|
||
private val dataflowMap = mutable.HashMap[String, mutable.HashSet[String]]() | ||
private val fileImportMap = mutable.HashMap[String, mutable.HashSet[String]]() | ||
|
||
/** Given dataflow file paths, the function calculate all permutations and stores it in dataflowMap | ||
* | ||
* For input List(List(a,b,c), List(c,d)) we will store | ||
* | ||
* a -> (a,b,c) | ||
* | ||
* b -> (a,b,c) | ||
* | ||
* c -> (a,b,c,d) | ||
* | ||
* d -> (c,d) | ||
* | ||
* @param dataflowFiles | ||
*/ | ||
def addToDataflowMap(dataflowFiles: List[List[String]]): Unit = { | ||
|
||
val pairs = for { | ||
sublist <- dataflowFiles | ||
elem <- sublist | ||
} yield (elem, sublist) | ||
|
||
/* Explaining the above piece of code | ||
dataflow files => List(List(a,b,c), List(c,d)) | ||
pairs => List((a, List(a,b,c)), (b, List(a,b,c)), (c, List(a,b,c)), (c, List(c,d), (d, List(c,d))) | ||
*/ | ||
val grouped = pairs.groupBy(_._1) | ||
|
||
grouped.foreach { case (key, valuePairs) => | ||
if (!dataflowMap.contains(key)) | ||
dataflowMap(key) = mutable.HashSet[String]() | ||
dataflowMap(key).addAll(valuePairs.flatMap(_._2).distinct) | ||
} | ||
/* | ||
Here we will have dataflowMap as | ||
a -> (a,b,c) | ||
b -> (a,b,c) | ||
c -> (a,b,c,d) | ||
d -> (c,d) | ||
*/ | ||
} | ||
|
||
/** Get dataflowMapping of files | ||
* @return | ||
*/ | ||
def getDataflowMap: Map[String, mutable.HashSet[String]] = this.dataflowMap.toMap | ||
|
||
/** Add a mapping of fileName -> List(importedFile1, importedFile2) | ||
* | ||
* This basically corresponds to `importedFile1`, `importedFile2` being imported in fileName` | ||
* @param fileName | ||
* @param importFiles | ||
*/ | ||
def addToFileImportMap(fileName: String, importedFile: String): Unit = synchronized { | ||
|
||
if (!fileImportMap.contains(fileName)) | ||
fileImportMap(fileName) = mutable.HashSet[String]() | ||
fileImportMap(fileName).addOne(importedFile) | ||
} | ||
|
||
/** Get File to importedFiles mapping | ||
* @return | ||
*/ | ||
def getFileImportMap: Map[String, mutable.HashSet[String]] = this.fileImportMap.toMap | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
126 changes: 115 additions & 11 deletions
126
src/main/scala/ai/privado/entrypoint/MetadataProcessor.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,130 @@ | ||
package ai.privado.entrypoint | ||
|
||
import ai.privado.cache.AppCache | ||
import ai.privado.cache.{ | ||
AppCache, | ||
AuditCache, | ||
DataFlowCache, | ||
DatabaseDetailsCache, | ||
FileLinkingMetadata, | ||
PropertyFilterCache, | ||
S3DatabaseDetailsCache | ||
} | ||
import ai.privado.languageEngine.javascript.processor.JavascriptBaseCPGProcessor | ||
import ai.privado.languageEngine.python.processor.PythonBaseCPGProcessor | ||
import ai.privado.metadata.SystemInfo | ||
import ai.privado.metric.MetricHandler | ||
import ai.privado.model.Constants | ||
import ai.privado.model.Language.UNKNOWN | ||
import ai.privado.model.* | ||
import better.files.File | ||
import io.circe.Json | ||
import io.joern.console.cpgcreation.guessLanguage | ||
import ai.privado.entrypoint.MetadataProcessor.statsRecorder | ||
|
||
import scala.util.{Failure, Success, Try} | ||
|
||
object MetadataProcessor extends CommandProcessor { | ||
object MetadataProcessor extends CommandProcessor with RuleProcessor { | ||
|
||
private val auditCache = new AuditCache | ||
private val s3DatabaseDetailsCache = new S3DatabaseDetailsCache | ||
private val propertyFilterCache = new PropertyFilterCache() | ||
private val databaseDetailsCache = new DatabaseDetailsCache() | ||
private val fileLinkingMetadata = new FileLinkingMetadata() | ||
|
||
def getDataflowCache: DataFlowCache = { | ||
new DataFlowCache(config, auditCache) | ||
} | ||
|
||
override def process(appCache: AppCache): Either[String, Unit] = { | ||
|
||
def generateMetadata(): SystemInfo = { | ||
val systemInfo = SystemInfo.getInfo | ||
SystemInfo.dumpInfoToFile(config.sourceLocation.head, Constants.systemInfoFileName, systemInfo) | ||
systemInfo | ||
if (config.isDeltaFileScan) { | ||
processCpg(appCache) | ||
} else { | ||
Try(generateMetadata()) match | ||
case Failure(exception) => | ||
println(s"Exception when processing metadata command : ${exception.toString}") | ||
Left(exception.toString) | ||
case Success(systemInfo) => Right(systemInfo) | ||
} | ||
} | ||
|
||
def generateMetadata(): SystemInfo = { | ||
val systemInfo = SystemInfo.getInfo | ||
SystemInfo.dumpInfoToFile(config.sourceLocation.head, Constants.systemInfoFileName, systemInfo) | ||
systemInfo | ||
} | ||
|
||
private def processCpg(appCache: AppCache): Either[String, Unit] = { | ||
val sourceRepoLocation = File(config.sourceLocation.head).path.toAbsolutePath.toString.stripSuffix("/") | ||
val excludeFileRegex = config.excludeFileRegex | ||
// Setting up the application cache | ||
appCache.init(sourceRepoLocation, excludeFileRegex = excludeFileRegex) | ||
statsRecorder.initiateNewStage("Language detection") | ||
val languageDetected = if (config.forceLanguage == UNKNOWN) { | ||
val langDect = Try(guessLanguage(sourceRepoLocation)) | ||
statsRecorder.endLastStage() | ||
Language.withJoernLangName(langDect) | ||
} else { | ||
statsRecorder.justLogMessage("Language forced ...") | ||
statsRecorder.endLastStage() | ||
config.forceLanguage | ||
} | ||
MetricHandler.metricsData("language") = Json.fromString(languageDetected.toString) | ||
|
||
Try(generateMetadata()) match | ||
case Failure(exception) => | ||
println(s"Exception when processing metadata command : ${exception.toString}") | ||
Left(exception.toString) | ||
case Success(systemInfo) => Right(systemInfo) | ||
languageDetected match { | ||
case Language.JAVASCRIPT => | ||
statsRecorder.justLogMessage("Detected language 'JavaScript'") | ||
new JavascriptBaseCPGProcessor( | ||
getProcessedRule(Set(Language.JAVASCRIPT), appCache, statsRecorder, config), | ||
this.config, | ||
sourceRepoLocation, | ||
dataFlowCache = getDataflowCache, | ||
AuditCache(), | ||
S3DatabaseDetailsCache(), | ||
appCache, | ||
statsRecorder = statsRecorder, | ||
databaseDetailsCache = databaseDetailsCache, | ||
propertyFilterCache = propertyFilterCache, | ||
fileLinkingMetadata = fileLinkingMetadata | ||
).processCpg() | ||
case Language.PYTHON => | ||
statsRecorder.justLogMessage("Detected language 'Python'") | ||
new PythonBaseCPGProcessor( | ||
getProcessedRule(Set(Language.PYTHON), appCache, statsRecorder, config), | ||
this.config, | ||
sourceRepoLocation, | ||
dataFlowCache = getDataflowCache, | ||
auditCache, | ||
s3DatabaseDetailsCache, | ||
appCache, | ||
propertyFilterCache = propertyFilterCache, | ||
databaseDetailsCache = databaseDetailsCache, | ||
statsRecorder = statsRecorder, | ||
fileLinkingMetadata = fileLinkingMetadata | ||
).processCpg() | ||
case _ => | ||
println("language not supported yet..") | ||
statsRecorder.justLogMessage("Language not detected, force scanning using Javascript engine") | ||
new JavascriptBaseCPGProcessor( | ||
getProcessedRule(Set(Language.JAVASCRIPT), appCache, statsRecorder, config), | ||
this.config, | ||
sourceRepoLocation, | ||
dataFlowCache = getDataflowCache, | ||
AuditCache(), | ||
S3DatabaseDetailsCache(), | ||
appCache, | ||
statsRecorder = statsRecorder, | ||
databaseDetailsCache = databaseDetailsCache, | ||
propertyFilterCache = propertyFilterCache, | ||
fileLinkingMetadata = fileLinkingMetadata | ||
).processCpg() | ||
} match { | ||
case Left(err: String) => Left(err) | ||
case _ => | ||
Right( | ||
() | ||
) // Ignore the result as not needed for further step, and due to discrepency in output for New and old frontends | ||
} | ||
} | ||
|
||
} |
Oops, something went wrong.