From 5935fbe299354222764ad4185b8d67bd1ffd2d5f Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Mon, 5 Aug 2024 13:30:34 +0530 Subject: [PATCH 1/7] added sink export implementation --- .../ai/privado/exporter/ExporterUtility.scala | 7 +- .../ai/privado/exporter/JSONExporter.scala | 6 +- .../ai/privado/exporter/SinkExporter.scala | 25 ++-- .../exporter/monolith/MonolithExporter.scala | 3 +- .../base/processor/BaseProcessor.scala | 3 +- .../exporter/SinkProcessingExporterTest.scala | 17 +++ .../java/tagger/JavaS3TaggerTest.scala | 107 ++++++++---------- .../python/PythonS3TaggerTest.scala | 20 ++-- .../collection/CollectionValidator.scala | 13 ++- 9 files changed, 109 insertions(+), 92 deletions(-) create mode 100644 src/test/scala/ai/privado/exporter/SinkProcessingExporterTest.scala diff --git a/src/main/scala/ai/privado/exporter/ExporterUtility.scala b/src/main/scala/ai/privado/exporter/ExporterUtility.scala index 76949eb17..54558c3ec 100644 --- a/src/main/scala/ai/privado/exporter/ExporterUtility.scala +++ b/src/main/scala/ai/privado/exporter/ExporterUtility.scala @@ -378,7 +378,8 @@ object ExporterUtility { repoItemTagName: Option[String] = None, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - propertyFilterCache: PropertyFilterCache = PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = PropertyFilterCache(), + dataflowCache: DataFlowCache ): ( mutable.LinkedHashMap[String, Json], List[SourceModel], @@ -398,7 +399,9 @@ object ExporterUtility { repoItemTagName = repoItemTagName, s3DatabaseDetailsCache, appCache, - databaseDetailsCache + databaseDetailsCache, + dataflowCache, + dataflows ) val dataflowExporter = new DataflowExporter(dataflows, taggerCache, databaseDetailsCache) val collectionExporter = diff --git a/src/main/scala/ai/privado/exporter/JSONExporter.scala b/src/main/scala/ai/privado/exporter/JSONExporter.scala index da6276504..fd673b6df 100644 --- a/src/main/scala/ai/privado/exporter/JSONExporter.scala +++ b/src/main/scala/ai/privado/exporter/JSONExporter.scala @@ -91,7 +91,8 @@ object JSONExporter { s3DatabaseDetailsCache: S3DatabaseDetailsCache, appCache: AppCache, propertyFilterCache: PropertyFilterCache, - databaseDetailsCache: DatabaseDetailsCache + databaseDetailsCache: DatabaseDetailsCache, + dataflowCache: DataFlowCache ): Either[String, Map[String, Json]] = { try { @@ -115,7 +116,8 @@ object JSONExporter { s3DatabaseDetailsCache, appCache = appCache, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache + propertyFilterCache = propertyFilterCache, + dataflowCache = dataflowCache ) // Add the privado json path of each monolith repository item diff --git a/src/main/scala/ai/privado/exporter/SinkExporter.scala b/src/main/scala/ai/privado/exporter/SinkExporter.scala index 813c812f4..1e3dfa877 100644 --- a/src/main/scala/ai/privado/exporter/SinkExporter.scala +++ b/src/main/scala/ai/privado/exporter/SinkExporter.scala @@ -23,7 +23,7 @@ package ai.privado.exporter -import ai.privado.cache.{AppCache, DatabaseDetailsCache, RuleCache, S3DatabaseDetailsCache} +import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, S3DatabaseDetailsCache} import ai.privado.entrypoint.{PrivadoInput, ScanProcessor} import ai.privado.languageEngine.default.NodeStarters import ai.privado.model.exporter.{SinkModel, SinkProcessingModel} @@ -36,6 +36,7 @@ import io.shiftleft.codepropertygraph.generated.nodes.{AstNode, CfgNode, Tag} import io.shiftleft.semanticcpg.language.* import overflowdb.traversal.Traversal import org.slf4j.LoggerFactory +import io.joern.dataflowengineoss.language.Path import scala.collection.mutable @@ -46,7 +47,9 @@ class SinkExporter( repoItemTagName: Option[String] = None, s3DatabaseDetailsCache: S3DatabaseDetailsCache, appCache: AppCache, - databaseDetailsCache: DatabaseDetailsCache + databaseDetailsCache: DatabaseDetailsCache, + dataflowCache: DataFlowCache, + dataflows: Map[String, Path] ) { lazy val sinkList: List[AstNode] = getSinkList @@ -81,22 +84,14 @@ class SinkExporter( }) processingMap .map(entrySet => + // list of last node of every dataflow + val dataflowSinkList = dataflowCache.getDataflowAfterDedup + .filter(_.sinkId.equals(entrySet._1)) + .map(pathModel => dataflows(pathModel.pathId).elements.last) SinkProcessingModel( entrySet._1, ExporterUtility - .convertPathElements( - { - if (privadoInput.disableDeDuplication) - entrySet._2.toList - else - entrySet._2.toList - .distinctBy(_.code) - .distinctBy(_.lineNumber) - .distinctBy(Utilities.getFileNameForNode) - }, - appCache = appCache, - ruleCache = ruleCache - ) + .convertPathElements(dataflowSinkList, appCache = appCache, ruleCache = ruleCache) ) ) .toList ++ processingMapDisableDedup diff --git a/src/main/scala/ai/privado/exporter/monolith/MonolithExporter.scala b/src/main/scala/ai/privado/exporter/monolith/MonolithExporter.scala index 2f68bc0f2..484b591f3 100644 --- a/src/main/scala/ai/privado/exporter/monolith/MonolithExporter.scala +++ b/src/main/scala/ai/privado/exporter/monolith/MonolithExporter.scala @@ -122,7 +122,8 @@ object MonolithExporter { s3DatabaseDetailsCache, repoItemTagName = Option(repoItemTagName), appCache = appCache, - databaseDetailsCache + databaseDetailsCache, + dataflowCache = dataFlowCache ) output.addOne( diff --git a/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala b/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala index 75400040f..f704994d6 100644 --- a/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala @@ -201,7 +201,8 @@ abstract class BaseProcessor( s3DatabaseDetailsCache, appCache, propertyFilterCache, - databaseDetailsCache + databaseDetailsCache, + dataFlowCache ) match { case Left(err) => MetricHandler.otherErrorsOrWarnings.addOne(err) diff --git a/src/test/scala/ai/privado/exporter/SinkProcessingExporterTest.scala b/src/test/scala/ai/privado/exporter/SinkProcessingExporterTest.scala new file mode 100644 index 000000000..1aed6920f --- /dev/null +++ b/src/test/scala/ai/privado/exporter/SinkProcessingExporterTest.scala @@ -0,0 +1,17 @@ +package ai.privado.exporter + +import ai.privado.testfixtures.JavaFrontendTestSuite + +class SinkProcessingExporterTest extends JavaFrontendTestSuite { + + "Identifier Test" should { + val cpg = code( + """ + |class main { + | String firstName = "name"; + | System.out.println(firstName); + |} + |""".stripMargin) + } + +} diff --git a/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala b/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala index a40e4edeb..38fb78441 100644 --- a/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala +++ b/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala @@ -27,72 +27,63 @@ import ai.privado.cache.{AppCache, S3DatabaseDetailsCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.exporter.SinkExporter import ai.privado.languageEngine.java.JavaTaggingTestBase +import ai.privado.model.Constants +import ai.privado.model.exporter.SinkModel +import ai.privado.model.exporter.SinkEncoderDecoder.* import ai.privado.tagger.sink.RegularSinkTagger +import ai.privado.testfixtures.JavaFrontendTestSuite import scala.collection.mutable -class JavaS3TaggerTest extends JavaTaggingTestBase { - private val privadoInput = PrivadoInput() +class JavaS3TaggerTest extends JavaFrontendTestSuite { - override def beforeAll(): Unit = { - super.beforeAll() - new RegularSinkTagger(cpg, ruleCache, databaseDetailsCache).createAndApply() - new JavaS3Tagger(cpg, s3DatabaseDetailsCache, databaseDetailsCache).createAndApply() - } + "Java code reading and writing from S3 bucket" should { - override val javaFileContents: String = - """ - |import software.amazon.awssdk.core.sync.RequestBody; - |import software.amazon.awssdk.services.s3.S3Client; - |import software.amazon.awssdk.services.s3.model.PutObjectRequest; - |import software.amazon.awssdk.services.s3.model.GetObjectRequest; - | - |public class AWS { - | public static void main(String[] args) { - | S3Client s3Client = S3Client.builder().build(); - | String objectKey = "your-object-key"; - | String filePath = "path/to/your/file.txt"; - | - | PutObjectRequest request = PutObjectRequest.builder() - | .bucket("my-write-bucket") - | .key(objectKey) - | .build(); - | - | GetObjectRequest request2 = GetObjectRequest.builder() - | .key(objectKey) - | .bucket("my-read-bucket") - | .build(); - | - | ResponseBytes objectBytes = s3Client.getObjectAsBytes(request2); - | byte[] data = objectBytes.asByteArray(); - | - | // Write the data to a local file. - | File myFile = new File(filePath); - | OutputStream os = new FileOutputStream(myFile); - | os.write(data); - | System.out.println("Successfully obtained bytes from an S3 object"); - | os.close(); - | - | s3Client.putObject(request, RequestBody.fromFile(new File(filePath)); - | s3Client.close(); - | } - |} - |""".stripMargin + val cpg = code(""" + |import software.amazon.awssdk.core.sync.RequestBody; + |import software.amazon.awssdk.services.s3.S3Client; + |import software.amazon.awssdk.services.s3.model.PutObjectRequest; + |import software.amazon.awssdk.services.s3.model.GetObjectRequest; + | + |public class AWS { + | public static void main(String[] args) { + | S3Client s3Client = S3Client.builder().build(); + | String objectKey = "your-object-key"; + | String filePath = "path/to/your/file.txt"; + | + | PutObjectRequest request = PutObjectRequest.builder() + | .bucket("my-write-bucket") + | .key(objectKey) + | .build(); + | + | GetObjectRequest request2 = GetObjectRequest.builder() + | .key(objectKey) + | .bucket("my-read-bucket") + | .build(); + | + | ResponseBytes objectBytes = s3Client.getObjectAsBytes(request2); + | byte[] data = objectBytes.asByteArray(); + | + | // Write the data to a local file. + | File myFile = new File(filePath); + | OutputStream os = new FileOutputStream(myFile); + | os.write(data); + | System.out.println("Successfully obtained bytes from an S3 object"); + | os.close(); + | + | s3Client.putObject(request, RequestBody.fromFile(new File(filePath)); + | s3Client.close(); + | } + |} + |""".stripMargin) - "Java code reading and writing from S3 bucket" should { "have bucket name" in { - val sinkExporter = - new SinkExporter( - cpg, - ruleCache, - privadoInput, - None, - s3DatabaseDetailsCache, - appCache = new AppCache(), - databaseDetailsCache = databaseDetailsCache - ) - sinkExporter.getSinks.map(_.databaseDetails.dbName) shouldBe List("my-write-bucket", "my-read-bucket") + val outputMap = cpg.getPrivadoJson() + val sinks = outputMap(Constants.sinks) + .as[List[SinkModel]] + .getOrElse(List()) + + sinks.map(_.databaseDetails.dbName) shouldBe List("my-write-bucket", "my-read-bucket") } } - } diff --git a/src/test/scala/ai/privado/languageEngine/python/PythonS3TaggerTest.scala b/src/test/scala/ai/privado/languageEngine/python/PythonS3TaggerTest.scala index c3bdfe9ca..7f5e6214e 100644 --- a/src/test/scala/ai/privado/languageEngine/python/PythonS3TaggerTest.scala +++ b/src/test/scala/ai/privado/languageEngine/python/PythonS3TaggerTest.scala @@ -27,6 +27,8 @@ import ai.privado.cache.{AppCache, DatabaseDetailsCache, RuleCache, S3DatabaseDe import ai.privado.entrypoint.PrivadoInput import ai.privado.exporter.SinkExporter import ai.privado.model.* +import ai.privado.model.exporter.{SinkModel, SourceProcessingModel} +import ai.privado.model.exporter.SinkEncoderDecoder.* import ai.privado.testfixtures.PythonFrontendTestSuite class PythonS3TaggerTest extends PythonFrontendTestSuite { @@ -105,17 +107,13 @@ class PythonS3TaggerTest extends PythonFrontendTestSuite { |""".stripMargin) "have bucket name" in { - val sinkExporter = - new SinkExporter( - cpg, - ruleCache, - privadoInput, - None, - s3DatabaseDetailsCache, - appCache = appCache, - databaseDetailsCache = DatabaseDetailsCache() - ) - sinkExporter.getSinks.head.databaseDetails.dbName shouldBe "meri-prod-bucket" + val outputMap = cpg.getPrivadoJson() + val sinks = outputMap(Constants.sinks) + .as[List[SinkModel]] + .getOrElse(List()) + + sinks.headOption.get.databaseDetails.dbName shouldBe "meri-prod-bucket" + } } } diff --git a/src/test/scala/ai/privado/tagger/collection/CollectionValidator.scala b/src/test/scala/ai/privado/tagger/collection/CollectionValidator.scala index 55392ac9c..519f40035 100644 --- a/src/test/scala/ai/privado/tagger/collection/CollectionValidator.scala +++ b/src/test/scala/ai/privado/tagger/collection/CollectionValidator.scala @@ -15,7 +15,15 @@ import io.shiftleft.codepropertygraph.generated.nodes.Method import io.shiftleft.semanticcpg.language.* import org.scalatest.matchers.should.Matchers import org.scalatest.Assertion -import ai.privado.cache.{AppCache, DatabaseDetailsCache, PropertyFilterCache, S3DatabaseDetailsCache, TaggerCache} +import ai.privado.cache.{ + AppCache, + AuditCache, + DataFlowCache, + DatabaseDetailsCache, + PropertyFilterCache, + S3DatabaseDetailsCache, + TaggerCache +} import ai.privado.entrypoint.PrivadoInput import ai.privado.model.Constants.outputFileName import ai.privado.rule.RuleInfoTestData @@ -71,7 +79,8 @@ trait CollectionValidator extends Matchers { None, appCache, new DatabaseDetailsCache(), - new PropertyFilterCache() + new PropertyFilterCache(), + new DataFlowCache(privadoInput, new AuditCache()) ) collections.size shouldBe 1 From f3fc0f2beefdb3da23f772dc374a7a48692b6221 Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Wed, 7 Aug 2024 13:44:49 +0530 Subject: [PATCH 2/7] sink processing dataflow element --- .../ai/privado/exporter/ExporterUtility.scala | 9 +- .../ai/privado/exporter/SinkExporter.scala | 40 ++++--- .../exporter/DataflowExporterValidator.scala | 7 ++ .../exporter/SinkExporterValidator.scala | 16 +++ .../exporter/SinkProcessingExporterTest.scala | 105 +++++++++++++++++- 5 files changed, 151 insertions(+), 26 deletions(-) create mode 100644 src/test/scala/ai/privado/exporter/SinkExporterValidator.scala diff --git a/src/main/scala/ai/privado/exporter/ExporterUtility.scala b/src/main/scala/ai/privado/exporter/ExporterUtility.scala index 54558c3ec..8881c06c7 100644 --- a/src/main/scala/ai/privado/exporter/ExporterUtility.scala +++ b/src/main/scala/ai/privado/exporter/ExporterUtility.scala @@ -399,9 +399,7 @@ object ExporterUtility { repoItemTagName = repoItemTagName, s3DatabaseDetailsCache, appCache, - databaseDetailsCache, - dataflowCache, - dataflows + databaseDetailsCache ) val dataflowExporter = new DataflowExporter(dataflows, taggerCache, databaseDetailsCache) val collectionExporter = @@ -457,9 +455,6 @@ object ExporterUtility { val sinks = Future { Try(sinkExporter.getSinks).getOrElse(List[SinkModel]()) } - val processingSinks = Future { - Try(sinkExporter.getProcessing).getOrElse(List[SinkProcessingModel]()) - } val collections = Future { Try(collectionExporter.getCollections).getOrElse(List[CollectionModel]()) } @@ -585,7 +580,7 @@ object ExporterUtility { logger.debug("Done with exporting Processing sources") val _sinks = Await.result(sinks, Duration.Inf) logger.debug("Done with exporting Sinks") - val _processingSinks = Await.result(processingSinks, Duration.Inf) + val _processingSinks = Try(sinkExporter.getProcessing(dataflowsOutput)).getOrElse(List[SinkProcessingModel]()) logger.debug("Done with exporting Processing Sinks") val _permissions = Await.result(androidPermissions, Duration.Inf) logger.debug("Done with exporting android permissions") diff --git a/src/main/scala/ai/privado/exporter/SinkExporter.scala b/src/main/scala/ai/privado/exporter/SinkExporter.scala index 1e3dfa877..a1a4ca7ae 100644 --- a/src/main/scala/ai/privado/exporter/SinkExporter.scala +++ b/src/main/scala/ai/privado/exporter/SinkExporter.scala @@ -26,7 +26,7 @@ package ai.privado.exporter import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, S3DatabaseDetailsCache} import ai.privado.entrypoint.{PrivadoInput, ScanProcessor} import ai.privado.languageEngine.default.NodeStarters -import ai.privado.model.exporter.{SinkModel, SinkProcessingModel} +import ai.privado.model.exporter.{DataFlowSubCategoryModel, SinkModel, SinkProcessingModel} import ai.privado.model.exporter.DataFlowEncoderDecoder.* import ai.privado.semantic.Language.* import ai.privado.model.{CatLevelOne, Constants, DatabaseDetails, InternalTag, NodeType} @@ -47,9 +47,7 @@ class SinkExporter( repoItemTagName: Option[String] = None, s3DatabaseDetailsCache: S3DatabaseDetailsCache, appCache: AppCache, - databaseDetailsCache: DatabaseDetailsCache, - dataflowCache: DataFlowCache, - dataflows: Map[String, Path] + databaseDetailsCache: DatabaseDetailsCache ) { lazy val sinkList: List[AstNode] = getSinkList @@ -63,7 +61,9 @@ class SinkExporter( convertSinkList(sinkTagList) } - def getProcessing: List[SinkProcessingModel] = { + def getProcessing( + dataflowsOutput: mutable.LinkedHashMap[String, List[DataFlowSubCategoryModel]] + ): List[SinkProcessingModel] = { val processingMap = mutable.HashMap[String, mutable.Set[AstNode]]() // special map to store sink processing which should never be deduplicated val processingMapDisableDedup = mutable.HashMap[String, mutable.Set[AstNode]]() @@ -84,15 +84,29 @@ class SinkExporter( }) processingMap .map(entrySet => - // list of last node of every dataflow - val dataflowSinkList = dataflowCache.getDataflowAfterDedup - .filter(_.sinkId.equals(entrySet._1)) - .map(pathModel => dataflows(pathModel.pathId).elements.last) - SinkProcessingModel( - entrySet._1, - ExporterUtility - .convertPathElements(dataflowSinkList, appCache = appCache, ruleCache = ruleCache) + // List of Tagged sinks + val taggedSinkList = ExporterUtility.convertPathElements( + { + if (privadoInput.disableDeDuplication) + entrySet._2.toList + else + entrySet._2.toList + .distinctBy(_.code) + .distinctBy(_.lineNumber) + .distinctBy(Utilities.getFileNameForNode) + }, + appCache = appCache, + ruleCache = ruleCache ) + // list of last node of every dataflow + val dataflowSinkList = dataflowsOutput + .flatMap(_._2) + .flatMap(_.sinks) + .filter(_.id.equals(entrySet._1)) + .flatMap(_.paths) + .map(_.path.last) + val finalProcessingResultList = (taggedSinkList ++ dataflowSinkList).toSet + SinkProcessingModel(entrySet._1, finalProcessingResultList.toList) ) .toList ++ processingMapDisableDedup .map(entrySet => diff --git a/src/test/scala/ai/privado/exporter/DataflowExporterValidator.scala b/src/test/scala/ai/privado/exporter/DataflowExporterValidator.scala index 44c64eefc..64aaafc34 100644 --- a/src/test/scala/ai/privado/exporter/DataflowExporterValidator.scala +++ b/src/test/scala/ai/privado/exporter/DataflowExporterValidator.scala @@ -18,4 +18,11 @@ trait DataflowExporterValidator { alldataflows(Constants.leakages) } + def getStorageFlows(outputMap: Map[String, Json]): List[DataFlowSubCategoryModel] = { + val allDataflows = outputMap(Constants.dataFlow) + .as[mutable.LinkedHashMap[String, List[DataFlowSubCategoryModel]]] + .getOrElse(Map.empty[String, List[DataFlowSubCategoryModel]]) + + allDataflows(Constants.storages) + } } diff --git a/src/test/scala/ai/privado/exporter/SinkExporterValidator.scala b/src/test/scala/ai/privado/exporter/SinkExporterValidator.scala new file mode 100644 index 000000000..2b7c5f6db --- /dev/null +++ b/src/test/scala/ai/privado/exporter/SinkExporterValidator.scala @@ -0,0 +1,16 @@ +package ai.privado.exporter + +import ai.privado.model.Constants +import ai.privado.model.exporter.SinkProcessingModel +import ai.privado.model.exporter.SinkEncoderDecoder.* +import io.circe.Json + +trait SinkExporterValidator { + + def getSinkProcessings(outputMap: Map[String, Json]): List[SinkProcessingModel] = { + val processings = outputMap(Constants.sinkProcessing) + .as[List[SinkProcessingModel]] + .getOrElse(List()) + processings + } +} diff --git a/src/test/scala/ai/privado/exporter/SinkProcessingExporterTest.scala b/src/test/scala/ai/privado/exporter/SinkProcessingExporterTest.scala index 1aed6920f..ff5032289 100644 --- a/src/test/scala/ai/privado/exporter/SinkProcessingExporterTest.scala +++ b/src/test/scala/ai/privado/exporter/SinkProcessingExporterTest.scala @@ -1,17 +1,110 @@ package ai.privado.exporter +import ai.privado.model.{CatLevelOne, Constants, FilterProperty, Language, NodeType, RuleInfo} +import ai.privado.model.exporter.SinkProcessingModel +import ai.privado.exporter.DataflowExporterValidator import ai.privado.testfixtures.JavaFrontendTestSuite +import ai.privado.rule.{RuleInfoTestData, SinkRuleTestData} +import ai.privado.cache.RuleCache +import io.shiftleft.semanticcpg.language.* -class SinkProcessingExporterTest extends JavaFrontendTestSuite { +class SinkProcessingExporterTest + extends JavaFrontendTestSuite + with DataflowExporterValidator + with SinkExporterValidator { - "Identifier Test" should { + val sinkRule = List( + RuleInfo( + "Storages.Local", + "Local Storage", + "", + FilterProperty.METHOD_FULL_NAME, + Array("local.com"), + List("(?i).*(localStorage).*(save|find).*"), + false, + "", + Map(), + NodeType.REGULAR, + "", + CatLevelOne.SINKS, + catLevelTwo = Constants.storages, + Language.JAVA, + Array() + ) + ) + + val ruleCache = RuleCache().setRule( + RuleInfoTestData.rule + .copy(sources = RuleInfoTestData.sourceRule, sinks = sinkRule) + ) + + "Simple sink processing dataflow Test" should { val cpg = code( """ - |class main { - | String firstName = "name"; - | System.out.println(firstName); + |import Dummy.LocalStorage; + | + |class Main { + | public void printValues() { + | String firstName = "first"; + | + | LocalStorage localStorage = new LocalStorage(); + | + | localStorage.save(firstName); + | } |} - |""".stripMargin) + |""".stripMargin, + "index.java" + ) + .withRuleCache(ruleCache) + + "Sink processing should have correct node" in { + val outputJson = cpg.getPrivadoJson() + val processingList = getSinkProcessings(outputJson).flatMap(_.occurrences) + val storageDataflow = getStorageFlows(outputJson) + val dataflowSinkElements = storageDataflow.flatMap(_.sinks).flatMap(_.paths).map(_.path.lastOption.get) + + dataflowSinkElements.foreach(lastElement => { + processingList.exists(element => { + element.sample.equals(lastElement.sample) && element.lineNumber.equals(lastElement.lineNumber) + }) shouldBe true + }) + } } + "Sink processing test when two sinks tagged with same rule" should { + val cpg = code( + """ + |import Dummy.LocalStorage; + | + |class Main { + | public void printValues() { + | String firstName = "name"; + | String lastName = "last"; + | + | LocalStorage localStorage = new LocalStorage(); + | + | localStorage.save(firstName); + | localStorage.find(lastName); + | } + |} + |""".stripMargin, + "index.java" + ) + .withRuleCache(ruleCache) + + "Sink processing should have correct node" in { + val outputJson = cpg.getPrivadoJson() + val processingList = getSinkProcessings(outputJson).flatMap(_.occurrences) + val storageDataflow = getStorageFlows(outputJson) + val dataflowSinkElements = storageDataflow.flatMap(_.sinks).flatMap(_.paths).map(_.path.lastOption.get) + + processingList.size shouldBe 2 + + dataflowSinkElements.foreach(lastElement => { + processingList.exists(element => { + element.sample.equals(lastElement.sample) && element.lineNumber.equals(lastElement.lineNumber) + }) shouldBe true + }) + } + } } From a959d0bc0202fbb7f4522213355e90d8f8907a53 Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Wed, 7 Aug 2024 13:48:33 +0530 Subject: [PATCH 3/7] code refactoring --- src/main/scala/ai/privado/exporter/ExporterUtility.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/scala/ai/privado/exporter/ExporterUtility.scala b/src/main/scala/ai/privado/exporter/ExporterUtility.scala index 236825f74..fd4fd7f4b 100644 --- a/src/main/scala/ai/privado/exporter/ExporterUtility.scala +++ b/src/main/scala/ai/privado/exporter/ExporterUtility.scala @@ -390,6 +390,7 @@ object ExporterUtility { Int ) = { logger.info("Initiated exporter engine") + val sourceExporter = new SourceExporter(cpg, ruleCache, privadoInput, repoItemTagName = repoItemTagName, appCache) val sinkExporter = new SinkExporter( cpg, @@ -460,8 +461,6 @@ object ExporterUtility { ) }) - val sourceExporter = new SourceExporter(cpg, ruleCache, privadoInput, repoItemTagName = repoItemTagName, appCache) - // Future creates a thread and starts resolving the function call asynchronously val sources = Future { Try(sourceExporter.getSources).getOrElse(List[SourceModel]()) From f0b83e604bf43f5b5fbc68e4f0f67ce486f92288 Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Wed, 7 Aug 2024 13:55:05 +0530 Subject: [PATCH 4/7] code refactoring --- .../scala/ai/privado/exporter/ExporterUtility.scala | 3 +-- .../scala/ai/privado/exporter/JSONExporter.scala | 6 ++---- .../exporter/monolith/MonolithExporter.scala | 3 +-- .../base/processor/BaseProcessor.scala | 3 +-- .../tagger/collection/CollectionValidator.scala | 13 ++----------- 5 files changed, 7 insertions(+), 21 deletions(-) diff --git a/src/main/scala/ai/privado/exporter/ExporterUtility.scala b/src/main/scala/ai/privado/exporter/ExporterUtility.scala index fd4fd7f4b..6e3ee9d49 100644 --- a/src/main/scala/ai/privado/exporter/ExporterUtility.scala +++ b/src/main/scala/ai/privado/exporter/ExporterUtility.scala @@ -378,8 +378,7 @@ object ExporterUtility { repoItemTagName: Option[String] = None, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - propertyFilterCache: PropertyFilterCache = PropertyFilterCache(), - dataflowCache: DataFlowCache + propertyFilterCache: PropertyFilterCache = PropertyFilterCache() ): ( mutable.LinkedHashMap[String, Json], List[SourceModel], diff --git a/src/main/scala/ai/privado/exporter/JSONExporter.scala b/src/main/scala/ai/privado/exporter/JSONExporter.scala index fd673b6df..da6276504 100644 --- a/src/main/scala/ai/privado/exporter/JSONExporter.scala +++ b/src/main/scala/ai/privado/exporter/JSONExporter.scala @@ -91,8 +91,7 @@ object JSONExporter { s3DatabaseDetailsCache: S3DatabaseDetailsCache, appCache: AppCache, propertyFilterCache: PropertyFilterCache, - databaseDetailsCache: DatabaseDetailsCache, - dataflowCache: DataFlowCache + databaseDetailsCache: DatabaseDetailsCache ): Either[String, Map[String, Json]] = { try { @@ -116,8 +115,7 @@ object JSONExporter { s3DatabaseDetailsCache, appCache = appCache, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache, - dataflowCache = dataflowCache + propertyFilterCache = propertyFilterCache ) // Add the privado json path of each monolith repository item diff --git a/src/main/scala/ai/privado/exporter/monolith/MonolithExporter.scala b/src/main/scala/ai/privado/exporter/monolith/MonolithExporter.scala index 484b591f3..2f68bc0f2 100644 --- a/src/main/scala/ai/privado/exporter/monolith/MonolithExporter.scala +++ b/src/main/scala/ai/privado/exporter/monolith/MonolithExporter.scala @@ -122,8 +122,7 @@ object MonolithExporter { s3DatabaseDetailsCache, repoItemTagName = Option(repoItemTagName), appCache = appCache, - databaseDetailsCache, - dataflowCache = dataFlowCache + databaseDetailsCache ) output.addOne( diff --git a/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala b/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala index f704994d6..75400040f 100644 --- a/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala @@ -201,8 +201,7 @@ abstract class BaseProcessor( s3DatabaseDetailsCache, appCache, propertyFilterCache, - databaseDetailsCache, - dataFlowCache + databaseDetailsCache ) match { case Left(err) => MetricHandler.otherErrorsOrWarnings.addOne(err) diff --git a/src/test/scala/ai/privado/tagger/collection/CollectionValidator.scala b/src/test/scala/ai/privado/tagger/collection/CollectionValidator.scala index 519f40035..55392ac9c 100644 --- a/src/test/scala/ai/privado/tagger/collection/CollectionValidator.scala +++ b/src/test/scala/ai/privado/tagger/collection/CollectionValidator.scala @@ -15,15 +15,7 @@ import io.shiftleft.codepropertygraph.generated.nodes.Method import io.shiftleft.semanticcpg.language.* import org.scalatest.matchers.should.Matchers import org.scalatest.Assertion -import ai.privado.cache.{ - AppCache, - AuditCache, - DataFlowCache, - DatabaseDetailsCache, - PropertyFilterCache, - S3DatabaseDetailsCache, - TaggerCache -} +import ai.privado.cache.{AppCache, DatabaseDetailsCache, PropertyFilterCache, S3DatabaseDetailsCache, TaggerCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.model.Constants.outputFileName import ai.privado.rule.RuleInfoTestData @@ -79,8 +71,7 @@ trait CollectionValidator extends Matchers { None, appCache, new DatabaseDetailsCache(), - new PropertyFilterCache(), - new DataFlowCache(privadoInput, new AuditCache()) + new PropertyFilterCache() ) collections.size shouldBe 1 From f2f14371c6f47616c2c38be452ec5b6172880d0a Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Wed, 7 Aug 2024 14:14:10 +0530 Subject: [PATCH 5/7] Java S3 test fix --- .../java/tagger/JavaS3TaggerTest.scala | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala b/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala index 38fb78441..0ac43b10e 100644 --- a/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala +++ b/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala @@ -27,18 +27,62 @@ import ai.privado.cache.{AppCache, S3DatabaseDetailsCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.exporter.SinkExporter import ai.privado.languageEngine.java.JavaTaggingTestBase -import ai.privado.model.Constants +import ai.privado.model.{CatLevelOne, Constants, FilterProperty, Language, NodeType, RuleInfo} import ai.privado.model.exporter.SinkModel import ai.privado.model.exporter.SinkEncoderDecoder.* import ai.privado.tagger.sink.RegularSinkTagger import ai.privado.testfixtures.JavaFrontendTestSuite +import ai.privado.cache.RuleCache +import ai.privado.rule.RuleInfoTestData import scala.collection.mutable class JavaS3TaggerTest extends JavaFrontendTestSuite { + private val sinkRule = List( + RuleInfo( + "Storages.AmazonS3.Read", + "Amazon S3", + "Storage", + FilterProperty.METHOD_FULL_NAME, + Array(), + List(".*GetObjectRequest.*"), + false, + "", + Map(), + NodeType.REGULAR, + "", + CatLevelOne.SINKS, + "", + Language.JAVA, + Array() + ), + RuleInfo( + "Storages.AmazonS3.Write", + "Amazon S3", + "Storage", + FilterProperty.METHOD_FULL_NAME, + Array(), + List(".*PutObjectRequest.*"), + false, + "", + Map(), + NodeType.REGULAR, + "", + CatLevelOne.SINKS, + "", + Language.JAVA, + Array() + ) + ) + "Java code reading and writing from S3 bucket" should { + val ruleCache = RuleCache().setRule( + RuleInfoTestData.rule + .copy(sources = RuleInfoTestData.sourceRule, sinks = sinkRule) + ) + val cpg = code(""" |import software.amazon.awssdk.core.sync.RequestBody; |import software.amazon.awssdk.services.s3.S3Client; @@ -76,6 +120,7 @@ class JavaS3TaggerTest extends JavaFrontendTestSuite { | } |} |""".stripMargin) + .withRuleCache(ruleCache) "have bucket name" in { val outputMap = cpg.getPrivadoJson() From da2c1c2d2450023324e8370f1c84cf742085422e Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Mon, 12 Aug 2024 01:22:18 +0530 Subject: [PATCH 6/7] added sink exporter --- .../scala/ai/privado/exporter/SinkExporterValidator.scala | 8 ++++++++ .../languageEngine/java/tagger/JavaS3TaggerTest.scala | 8 +++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/test/scala/ai/privado/exporter/SinkExporterValidator.scala b/src/test/scala/ai/privado/exporter/SinkExporterValidator.scala index 2b7c5f6db..1062e2655 100644 --- a/src/test/scala/ai/privado/exporter/SinkExporterValidator.scala +++ b/src/test/scala/ai/privado/exporter/SinkExporterValidator.scala @@ -3,6 +3,7 @@ package ai.privado.exporter import ai.privado.model.Constants import ai.privado.model.exporter.SinkProcessingModel import ai.privado.model.exporter.SinkEncoderDecoder.* +import ai.privado.model.exporter.SinkModel import io.circe.Json trait SinkExporterValidator { @@ -13,4 +14,11 @@ trait SinkExporterValidator { .getOrElse(List()) processings } + + def getSinks(outputMap: Map[String, Json]): List[SinkModel] = { + val sinks = outputMap(Constants.sinks) + .as[List[SinkModel]] + .getOrElse(List()) + sinks + } } diff --git a/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala b/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala index 0ac43b10e..d844e47cc 100644 --- a/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala +++ b/src/test/scala/ai/privado/languageEngine/java/tagger/JavaS3TaggerTest.scala @@ -25,7 +25,7 @@ package ai.privado.languageEngine.java.tagger import ai.privado.cache.{AppCache, S3DatabaseDetailsCache} import ai.privado.entrypoint.PrivadoInput -import ai.privado.exporter.SinkExporter +import ai.privado.exporter.{SinkExporter, SinkExporterValidator} import ai.privado.languageEngine.java.JavaTaggingTestBase import ai.privado.model.{CatLevelOne, Constants, FilterProperty, Language, NodeType, RuleInfo} import ai.privado.model.exporter.SinkModel @@ -37,7 +37,7 @@ import ai.privado.rule.RuleInfoTestData import scala.collection.mutable -class JavaS3TaggerTest extends JavaFrontendTestSuite { +class JavaS3TaggerTest extends JavaFrontendTestSuite with SinkExporterValidator { private val sinkRule = List( RuleInfo( @@ -124,9 +124,7 @@ class JavaS3TaggerTest extends JavaFrontendTestSuite { "have bucket name" in { val outputMap = cpg.getPrivadoJson() - val sinks = outputMap(Constants.sinks) - .as[List[SinkModel]] - .getOrElse(List()) + val sinks = getSinks(outputMap) sinks.map(_.databaseDetails.dbName) shouldBe List("my-write-bucket", "my-read-bucket") } From 7e1ae5a56ad780a032e955bb1c77a7d9425bf65d Mon Sep 17 00:00:00 2001 From: KhemrajSingh Rathore Date: Tue, 13 Aug 2024 19:46:43 +0530 Subject: [PATCH 7/7] dataflowDependency dump (#1242) * dataflowDependency dump * add - property dependency * fix testcases * fix - file path stored in file node should be relative (#1243) * add namespace to metadata * introduce more flags * add more relative imports in js * add extension * minor refactor --- .../privado/cache/FileLinkingMetadata.scala | 74 ++++ .../ai/privado/entrypoint/CommandParser.scala | 35 +- .../scala/ai/privado/entrypoint/Main.scala | 12 +- .../entrypoint/MetadataProcessor.scala | 126 +++++- .../ai/privado/entrypoint/RuleProcessor.scala | 361 ++++++++++++++++ .../ai/privado/entrypoint/ScanProcessor.scala | 391 ++---------------- .../ai/privado/exporter/JSONExporter.scala | 54 ++- .../base/processor/BaseProcessor.scala | 29 +- .../c/processor/CProcessor.scala | 18 +- .../c/tagger/PrivadoTagger.scala | 5 +- .../csharp/processor/CSharpProcessor.scala | 17 +- .../csharp/tagger/PrivadoTagger.scala | 7 +- .../tagger/sink/CSharpAPISinkTagger.scala | 9 +- .../csharp/tagger/sink/CSharpAPITagger.scala | 11 +- .../default/processor/DefaultProcessor.scala | 17 +- .../default/tagger/PrivadoTagger.scala | 5 +- .../go/processor/GoProcessor.scala | 17 +- .../go/tagger/PrivadoTagger.scala | 9 +- .../go/tagger/sink/GoAPISinkTagger.scala | 9 +- .../go/tagger/sink/GoAPITagger.scala | 11 +- .../java/processor/JavaProcessor.scala | 9 +- .../java/tagger/PrivadoTagger.scala | 7 +- .../tagger/sink/api/JavaAPISinkTagger.scala | 9 +- .../java/tagger/sink/api/JavaAPITagger.scala | 15 +- .../sink/framework/flink/FlinkTagger.scala | 5 +- .../metadata/FileImportMappingPassJS.scala | 66 +++ .../JavascriptBaseCPGProcessor.scala | 117 ++++++ .../processor/JavascriptProcessor.scala | 21 +- .../javascript/tagger/PrivadoTagger.scala | 7 +- .../tagger/sink/JSAPISinkTagger.scala | 10 +- .../javascript/tagger/sink/JSAPITagger.scala | 17 +- .../kotlin/processor/KotlinProcessor.scala | 10 +- .../kotlin/tagger/PrivadoTagger.scala | 9 +- .../php/processor/PhpProcessor.scala | 17 +- .../php/tagger/PrivadoTagger.scala | 7 +- .../php/tagger/sink/APITagger.scala | 14 +- .../php/tagger/sink/PhpAPISinkTagger.scala | 10 +- .../FileLinkingMetadataPassPython.scala | 105 +++++ .../processor/PythonBaseCPGProcessor.scala | 118 ++++++ .../python/processor/PythonProcessor.scala | 16 +- .../python/tagger/PrivadoTagger.scala | 15 +- .../tagger/sink/PythonAPISinkTagger.scala | 10 +- .../python/tagger/sink/PythonAPITagger.scala | 8 +- .../ruby/processor/RubyProcessor.scala | 17 +- .../ruby/tagger/PrivadoTagger.scala | 7 +- .../ruby/tagger/sink/APITagger.scala | 8 +- .../ruby/tagger/sink/RubyAPISinkTagger.scala | 10 +- .../scala/ai/privado/model/Constants.scala | 8 + .../ai/privado/tagger/PrivadoBaseTagger.scala | 16 +- src/main/scala/ai/privado/tagger/Tagger.scala | 5 +- .../ai/privado/tagger/sink/APITagger.scala | 14 +- .../tagger/sink/api/APISinkTagger.scala | 5 +- .../tagger/utility/APITaggerUtility.scala | 7 +- .../languageEngine/go/GoTestBase.scala | 3 +- .../passes/config/GoYamlLinkerPassTest.scala | 5 +- .../config/JavaYamlLinkerPassTest.scala | 11 +- ...avaAPISinkByMethodFullNameTaggerTest.scala | 5 +- .../JavaAPISinkByParameterTaggerTest.scala | 14 +- .../javascript/audit/APIReportTest.scala | 4 +- .../javascript/audit/HTTPReportTest.scala | 4 +- .../tagger/sink/JSAPITaggerTest.scala | 8 +- .../ai/privado/policyEngine/PolicyTests.scala | 13 +- 62 files changed, 1489 insertions(+), 514 deletions(-) create mode 100644 src/main/scala/ai/privado/cache/FileLinkingMetadata.scala create mode 100644 src/main/scala/ai/privado/entrypoint/RuleProcessor.scala create mode 100644 src/main/scala/ai/privado/languageEngine/javascript/metadata/FileImportMappingPassJS.scala create mode 100644 src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptBaseCPGProcessor.scala create mode 100644 src/main/scala/ai/privado/languageEngine/python/metadata/FileLinkingMetadataPassPython.scala create mode 100644 src/main/scala/ai/privado/languageEngine/python/processor/PythonBaseCPGProcessor.scala diff --git a/src/main/scala/ai/privado/cache/FileLinkingMetadata.scala b/src/main/scala/ai/privado/cache/FileLinkingMetadata.scala new file mode 100644 index 000000000..7f4e4b40a --- /dev/null +++ b/src/main/scala/ai/privado/cache/FileLinkingMetadata.scala @@ -0,0 +1,74 @@ +package ai.privado.cache + +import sourcecode.FileName + +import scala.collection.mutable +class FileLinkingMetadata { + + private val dataflowMap = mutable.HashMap[String, mutable.HashSet[String]]() + private val fileImportMap = mutable.HashMap[String, mutable.HashSet[String]]() + + /** Given dataflow file paths, the function calculate all permutations and stores it in dataflowMap + * + * For input List(List(a,b,c), List(c,d)) we will store + * + * a -> (a,b,c) + * + * b -> (a,b,c) + * + * c -> (a,b,c,d) + * + * d -> (c,d) + * + * @param dataflowFiles + */ + def addToDataflowMap(dataflowFiles: List[List[String]]): Unit = { + + val pairs = for { + sublist <- dataflowFiles + elem <- sublist + } yield (elem, sublist) + + /* Explaining the above piece of code + dataflow files => List(List(a,b,c), List(c,d)) + pairs => List((a, List(a,b,c)), (b, List(a,b,c)), (c, List(a,b,c)), (c, List(c,d), (d, List(c,d))) + */ + val grouped = pairs.groupBy(_._1) + + grouped.foreach { case (key, valuePairs) => + if (!dataflowMap.contains(key)) + dataflowMap(key) = mutable.HashSet[String]() + dataflowMap(key).addAll(valuePairs.flatMap(_._2).distinct) + } + /* + Here we will have dataflowMap as + a -> (a,b,c) + b -> (a,b,c) + c -> (a,b,c,d) + d -> (c,d) + */ + } + + /** Get dataflowMapping of files + * @return + */ + def getDataflowMap: Map[String, mutable.HashSet[String]] = this.dataflowMap.toMap + + /** Add a mapping of fileName -> List(importedFile1, importedFile2) + * + * This basically corresponds to `importedFile1`, `importedFile2` being imported in fileName` + * @param fileName + * @param importFiles + */ + def addToFileImportMap(fileName: String, importedFile: String): Unit = synchronized { + + if (!fileImportMap.contains(fileName)) + fileImportMap(fileName) = mutable.HashSet[String]() + fileImportMap(fileName).addOne(importedFile) + } + + /** Get File to importedFiles mapping + * @return + */ + def getFileImportMap: Map[String, mutable.HashSet[String]] = this.fileImportMap.toMap +} diff --git a/src/main/scala/ai/privado/entrypoint/CommandParser.scala b/src/main/scala/ai/privado/entrypoint/CommandParser.scala index 7c1847dae..bf65ba119 100644 --- a/src/main/scala/ai/privado/entrypoint/CommandParser.scala +++ b/src/main/scala/ai/privado/entrypoint/CommandParser.scala @@ -56,6 +56,7 @@ case class PrivadoInput( showUnresolvedFunctionsReport: Boolean = false, generateAuditReport: Boolean = false, dedSourceReport: Boolean = false, + fileLinkingReport: Boolean = false, enableAuditSemanticsFilter: Boolean = false, limitNoSinksForDataflows: Int = -1, limitArgExpansionDataflows: Int = -1, @@ -69,7 +70,10 @@ case class PrivadoInput( rubyParserTimeout: Long = 120, excludeFileRegex: String = "", extensionsForPhp: String = "", - isSkipHeaderFileContext: Boolean = false + isSkipHeaderFileContext: Boolean = false, + + // Metadata flags + isDeltaFileScan: Boolean = false ) object CommandConstants { @@ -117,6 +121,8 @@ object CommandConstants { val GENERATE_AUDIT_REPORT_ABBR = "gar" val DED_SOURCE_REPORT = "ded-source-report" val DED_SOURCE_REPORT_ABBR = "dsr" + val FILE_LINKING_REPORT = "file-linking-report" + val FILE_LINKING_REPORT_ABBR = "flr" val ENABLE_AUDIT_SEMANTIC_FILTER = "enable-audit-semantic" val ENABLE_AUDIT_SEMANTIC_FILTER_ABBR = "eas" val LIMIT_NO_SINKS_FOR_DATAFLOWS = "limit-no-sinks-for-dataflows" @@ -142,6 +148,9 @@ object CommandConstants { val EXTENSIONS_FOR_PHP_ABBR = "exphp" val IS_SKIP_HEADER_FILE_CONTEXT = "skip-header-file-context" val IS_SKIP_HEADER_FILE_CONTEXT_ABBR = "shfc" + + // Metadata flags + val IS_DELTA_FILE_SCAN = "delta-file-scan" } object CommandParser { @@ -289,6 +298,11 @@ object CommandParser { .optional() .action((_, c) => c.copy(dedSourceReport = true)) .text("Export the ded source report"), + opt[Unit](CommandConstants.FILE_LINKING_REPORT) + .abbr(CommandConstants.FILE_LINKING_REPORT_ABBR) + .optional() + .action((_, c) => c.copy(fileLinkingReport = true)) + .text("Export the file linking report"), opt[Unit](CommandConstants.ENABLE_AUDIT_SEMANTIC_FILTER) .abbr(CommandConstants.ENABLE_AUDIT_SEMANTIC_FILTER_ABBR) .optional() @@ -391,6 +405,25 @@ object CommandParser { .required() .action((x, c) => c.copy(sourceLocation = c.sourceLocation + x)) .text("Source code location"), + opt[String](CommandConstants.INTERNAL_CONFIG) + .abbr(CommandConstants.INTERNAL_CONFIG_ABBR) + .required() + .action((x, c) => c.copy(internalConfigPath = c.internalConfigPath + x)) + .text("Internal config and rule files location"), + opt[String](CommandConstants.EXTERNAL_CONFIG) + .abbr(CommandConstants.EXTERNAL_CONFIG_ABBR) + .optional() + .action((x, c) => c.copy(externalConfigPath = c.externalConfigPath + x)) + .text("External config and rule files location"), + opt[Unit](CommandConstants.FILE_LINKING_REPORT) + .abbr(CommandConstants.FILE_LINKING_REPORT_ABBR) + .optional() + .action((_, c) => c.copy(fileLinkingReport = true)) + .text("Export the file linking report"), + opt[Unit](CommandConstants.IS_DELTA_FILE_SCAN) + .optional() + .action((_, c) => c.copy(isDeltaFileScan = true)) + .text("Generate metadata for delta scan"), checkConfig(c => if (c.cmd.isEmpty) failure("") else success diff --git a/src/main/scala/ai/privado/entrypoint/Main.scala b/src/main/scala/ai/privado/entrypoint/Main.scala index 113e1e997..911c135bc 100644 --- a/src/main/scala/ai/privado/entrypoint/Main.scala +++ b/src/main/scala/ai/privado/entrypoint/Main.scala @@ -55,6 +55,16 @@ object Main extends GeneralMetadataLoggers { ) ) ) + case MetadataProcessor => + statsRecorder.startRecordingWithGivenFrequency( + Some( + TimeMetricRecordConfig( + basePath = s"${MetadataProcessor.config.sourceLocation.head}/.privado", + threadDumpFreq = MetadataProcessor.config.threadDumpFreq, + threadDumpAvgCPULimit = MetadataProcessor.config.threadDumpAvgCPULimit + ) + ) + ) case _ => } MetricHandler.timeMetric(processor.process(appCache), "Complete") match { @@ -80,7 +90,7 @@ object Main extends GeneralMetadataLoggers { // any user-facing non-debug logging to be done internally logger.debug("Failure from scan process:", e) logger.debug("Skipping auth flow due to scan failure") - logger.error("Error in scanning, skipping auth flow : " + e.getMessage) + logger.error("Error in scanning, skipping auth flow : ", e.getMessage) MetricHandler.compileAndSend(appCache) // NOTE: Removed the finally as it will not be invoked after exit(1) is called in exeption. // exit(1) is important to indicate scan failure to outer process. diff --git a/src/main/scala/ai/privado/entrypoint/MetadataProcessor.scala b/src/main/scala/ai/privado/entrypoint/MetadataProcessor.scala index b83262b6f..9faaa260f 100644 --- a/src/main/scala/ai/privado/entrypoint/MetadataProcessor.scala +++ b/src/main/scala/ai/privado/entrypoint/MetadataProcessor.scala @@ -1,26 +1,130 @@ package ai.privado.entrypoint -import ai.privado.cache.AppCache +import ai.privado.cache.{ + AppCache, + AuditCache, + DataFlowCache, + DatabaseDetailsCache, + FileLinkingMetadata, + PropertyFilterCache, + S3DatabaseDetailsCache +} +import ai.privado.languageEngine.javascript.processor.JavascriptBaseCPGProcessor +import ai.privado.languageEngine.python.processor.PythonBaseCPGProcessor import ai.privado.metadata.SystemInfo +import ai.privado.metric.MetricHandler import ai.privado.model.Constants +import ai.privado.model.Language.UNKNOWN +import ai.privado.model.* +import better.files.File +import io.circe.Json +import io.joern.console.cpgcreation.guessLanguage +import ai.privado.entrypoint.MetadataProcessor.statsRecorder import scala.util.{Failure, Success, Try} -object MetadataProcessor extends CommandProcessor { +object MetadataProcessor extends CommandProcessor with RuleProcessor { + + private val auditCache = new AuditCache + private val s3DatabaseDetailsCache = new S3DatabaseDetailsCache + private val propertyFilterCache = new PropertyFilterCache() + private val databaseDetailsCache = new DatabaseDetailsCache() + private val fileLinkingMetadata = new FileLinkingMetadata() + + def getDataflowCache: DataFlowCache = { + new DataFlowCache(config, auditCache) + } override def process(appCache: AppCache): Either[String, Unit] = { - def generateMetadata(): SystemInfo = { - val systemInfo = SystemInfo.getInfo - SystemInfo.dumpInfoToFile(config.sourceLocation.head, Constants.systemInfoFileName, systemInfo) - systemInfo + if (config.isDeltaFileScan) { + processCpg(appCache) + } else { + Try(generateMetadata()) match + case Failure(exception) => + println(s"Exception when processing metadata command : ${exception.toString}") + Left(exception.toString) + case Success(systemInfo) => Right(systemInfo) + } + } + + def generateMetadata(): SystemInfo = { + val systemInfo = SystemInfo.getInfo + SystemInfo.dumpInfoToFile(config.sourceLocation.head, Constants.systemInfoFileName, systemInfo) + systemInfo + } + + private def processCpg(appCache: AppCache): Either[String, Unit] = { + val sourceRepoLocation = File(config.sourceLocation.head).path.toAbsolutePath.toString.stripSuffix("/") + val excludeFileRegex = config.excludeFileRegex + // Setting up the application cache + appCache.init(sourceRepoLocation, excludeFileRegex = excludeFileRegex) + statsRecorder.initiateNewStage("Language detection") + val languageDetected = if (config.forceLanguage == UNKNOWN) { + val langDect = Try(guessLanguage(sourceRepoLocation)) + statsRecorder.endLastStage() + Language.withJoernLangName(langDect) + } else { + statsRecorder.justLogMessage("Language forced ...") + statsRecorder.endLastStage() + config.forceLanguage } + MetricHandler.metricsData("language") = Json.fromString(languageDetected.toString) - Try(generateMetadata()) match - case Failure(exception) => - println(s"Exception when processing metadata command : ${exception.toString}") - Left(exception.toString) - case Success(systemInfo) => Right(systemInfo) + languageDetected match { + case Language.JAVASCRIPT => + statsRecorder.justLogMessage("Detected language 'JavaScript'") + new JavascriptBaseCPGProcessor( + getProcessedRule(Set(Language.JAVASCRIPT), appCache, statsRecorder, config), + this.config, + sourceRepoLocation, + dataFlowCache = getDataflowCache, + AuditCache(), + S3DatabaseDetailsCache(), + appCache, + statsRecorder = statsRecorder, + databaseDetailsCache = databaseDetailsCache, + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata + ).processCpg() + case Language.PYTHON => + statsRecorder.justLogMessage("Detected language 'Python'") + new PythonBaseCPGProcessor( + getProcessedRule(Set(Language.PYTHON), appCache, statsRecorder, config), + this.config, + sourceRepoLocation, + dataFlowCache = getDataflowCache, + auditCache, + s3DatabaseDetailsCache, + appCache, + propertyFilterCache = propertyFilterCache, + databaseDetailsCache = databaseDetailsCache, + statsRecorder = statsRecorder, + fileLinkingMetadata = fileLinkingMetadata + ).processCpg() + case _ => + println("language not supported yet..") + statsRecorder.justLogMessage("Language not detected, force scanning using Javascript engine") + new JavascriptBaseCPGProcessor( + getProcessedRule(Set(Language.JAVASCRIPT), appCache, statsRecorder, config), + this.config, + sourceRepoLocation, + dataFlowCache = getDataflowCache, + AuditCache(), + S3DatabaseDetailsCache(), + appCache, + statsRecorder = statsRecorder, + databaseDetailsCache = databaseDetailsCache, + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata + ).processCpg() + } match { + case Left(err: String) => Left(err) + case _ => + Right( + () + ) // Ignore the result as not needed for further step, and due to discrepency in output for New and old frontends + } } } diff --git a/src/main/scala/ai/privado/entrypoint/RuleProcessor.scala b/src/main/scala/ai/privado/entrypoint/RuleProcessor.scala new file mode 100644 index 000000000..db6b3a53f --- /dev/null +++ b/src/main/scala/ai/privado/entrypoint/RuleProcessor.scala @@ -0,0 +1,361 @@ +package ai.privado.entrypoint + +import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.metric.MetricHandler +import ai.privado.model.* +import ai.privado.model.Language.Language +import ai.privado.rulevalidator.YamlFileValidator +import ai.privado.utility.StatsRecorder +import better.files.File +import org.slf4j.LoggerFactory + +import scala.collection.parallel.CollectionConverters.ImmutableIterableIsParallelizable +import scala.sys.exit +import io.circe.yaml.parser +import ai.privado.utility.Utilities.{isValidDEDRule, isValidRule} +import io.circe.Json + +trait RuleProcessor { + + private val logger = LoggerFactory.getLogger(this.getClass) + + def getEmptyConfigAndRule: ConfigAndRules = + ConfigAndRules( + List[RuleInfo](), + List[RuleInfo](), + List[RuleInfo](), + List[PolicyOrThreat](), + List[PolicyOrThreat](), + List[RuleInfo](), + List[Semantic](), + List[RuleInfo](), + List[SystemConfig](), + List[RuleInfo](), + List[RuleInfo](), + List[DEDRuleInfo]() + ) + + /** Helper function to process rule for a language + * + * @param lang + * @return + * processed rules + */ + def getProcessedRule( + lang: Set[Language], + appCache: AppCache, + statsRecorder: StatsRecorder, + config: PrivadoInput + ): RuleCache = { + appCache.repoLanguage = + lang.head // we are caching the repo language here, and we will use this to get the repo's lang + val ruleCache = new RuleCache() + val processedRules = processRules(lang, ruleCache, appCache, statsRecorder, config) + ruleCache.setRule(processedRules) + ruleCache + } + + def parseRules(rulesPath: String, lang: Set[Language], isExternal: Boolean = false): ConfigAndRules = { + logger.trace(s"parsing rules from -> '$rulesPath'") + val ir: File = { + // e.g. rulesPath = /home/pandurang/projects/rules-home/ + try File(rulesPath) + catch { + case ex: Throwable => + logger.debug("File error: ", ex) + logger.error(s"Exception while processing rules on path $rulesPath") + exit(1) + } + } + + def filterByLang(rule: RuleInfo): Boolean = + lang.contains(rule.language) || rule.language == Language.DEFAULT || rule.language == Language.UNKNOWN + + def filterDEDByLang(rule: DEDRuleInfo): Boolean = + lang.contains(rule.language) || rule.language == Language.DEFAULT || rule.language == Language.UNKNOWN + + def filterSemanticByLang(rule: Semantic): Boolean = + lang.contains(rule.language) || rule.language == Language.DEFAULT || rule.language == Language.UNKNOWN + + def filterSystemConfigByLang(rule: SystemConfig): Boolean = + lang.contains(rule.language) || rule.language == Language.DEFAULT || rule.language == Language.UNKNOWN + + val parsedRules = + try + ir.listRecursively.toList.par + .filter(f => + ((f.extension(toLowerCase = true).toString.contains(".yaml") || + f.extension(toLowerCase = true).toString.contains(".yml")) && + YamlFileValidator.isValidRuleFile(f, ir)) + ) + .map(file => { + // e.g. fullPath = /home/pandurang/projects/rules-home/rules/sources/accounts.yaml + val fullPath = file.pathAsString + logger.trace(s"parsing -> '$fullPath'") + // e.g. relPath = rules/sources/accounts + val relPath = fullPath.substring(ir.pathAsString.length + 1).split("\\.").head + val pathTree = relPath.split("/") + parser.parse(file.contentAsString) match { + case Right(json) => + import ai.privado.model.CirceEnDe.* + json.as[ConfigAndRules] match { + case Right(configAndRules: ConfigAndRules) => + configAndRules.copy( + exclusions = configAndRules.exclusions + .map(x => + x.copy( + file = fullPath, + categoryTree = pathTree, + language = Language.withNameWithDefault(pathTree.last) + ) + ) + .filter(filterByLang), + sources = configAndRules.sources + .filter(rule => isValidRule(rule.combinedRulePattern, rule.id, fullPath)) + .map(x => + x.copy( + file = fullPath, + catLevelOne = CatLevelOne.withNameWithDefault(pathTree.apply(1)), + categoryTree = pathTree, + language = Language.withNameWithDefault(pathTree.last), + nodeType = NodeType.REGULAR, + isExternal = isExternal + ) + ) + .filter(filterByLang), + sinks = configAndRules.sinks + .filter(rule => isValidRule(rule.combinedRulePattern, rule.id, fullPath)) + .map(x => + x.copy( + file = fullPath, + catLevelOne = CatLevelOne.withNameWithDefault(pathTree.apply(1)), + catLevelTwo = pathTree.apply(2), + categoryTree = pathTree, + language = Language.withNameWithDefault(pathTree.last), + nodeType = NodeType.withNameWithDefault(pathTree.apply(3)), + isExternal = isExternal + ) + ) + .filter(filterByLang), + collections = configAndRules.collections + .filter(rule => isValidRule(rule.combinedRulePattern, rule.id, fullPath)) + .map(x => + x.copy( + file = fullPath, + catLevelOne = CatLevelOne.withNameWithDefault(pathTree.apply(1)), + catLevelTwo = pathTree.apply(2), + categoryTree = pathTree, + nodeType = NodeType.REGULAR, + isExternal = isExternal + ) + ) + .filter(filterByLang), + policies = configAndRules.policies.map(x => x.copy(file = fullPath, categoryTree = pathTree)), + semantics = configAndRules.semantics + .map(x => + x.copy( + file = fullPath, + categoryTree = pathTree, + language = Language.withNameWithDefault(pathTree.last) + ) + ) + .filter(filterSemanticByLang), + sinkSkipList = configAndRules.sinkSkipList + .map(x => + x.copy( + file = fullPath, + categoryTree = pathTree, + language = Language.withNameWithDefault(pathTree.last) + ) + ) + .filter(filterByLang), + systemConfig = configAndRules.systemConfig + .map(x => + x.copy( + file = fullPath, + categoryTree = pathTree, + language = Language.withNameWithDefault(pathTree.last) + ) + ) + .filter(filterSystemConfigByLang), + auditConfig = configAndRules.auditConfig + .map(x => + x.copy( + file = fullPath, + categoryTree = pathTree, + catLevelTwo = pathTree.apply(2), + language = Language.withNameWithDefault(pathTree.last) + ) + ) + .filter(filterByLang), + inferences = configAndRules.inferences + .filter(rule => isValidRule(rule.combinedRulePattern, rule.id, fullPath)) + .map(x => + x.copy( + file = fullPath, + catLevelOne = CatLevelOne.INFERENCES, + catLevelTwo = pathTree.apply(2), + categoryTree = pathTree, + language = Language.withNameWithDefault(pathTree.last), + nodeType = NodeType.withNameWithDefault(pathTree.apply(3)), + isExternal = isExternal + ) + ) + .filter(filterByLang), + dedRules = configAndRules.dedRules + .filter(rule => isValidDEDRule(rule)) + .map(x => + x.copy( + file = fullPath, + catLevelOne = CatLevelOne.DED, + catLevelTwo = pathTree.apply(1), + categoryTree = pathTree, + language = Language.withNameWithDefault(pathTree.last), + nodeType = NodeType.withNameWithDefault(pathTree.apply(2)), + isExternal = isExternal + ) + ) + .filter(filterDEDByLang) + ) + case Left(error) => + logger.error("Error while parsing this file -> '" + fullPath) + logger.error("ERROR : ", error) + getEmptyConfigAndRule + } + case Left(error) => + logger.error("Error while parsing this file -> '" + fullPath) + logger.error("ERROR : ", error) + getEmptyConfigAndRule + } + }) + .foldLeft(getEmptyConfigAndRule)((a, b) => + a.copy( + sources = a.sources ++ b.sources, + sinks = a.sinks ++ b.sinks, + collections = a.collections ++ b.collections, + policies = a.policies ++ b.policies, + exclusions = a.exclusions ++ b.exclusions, + threats = a.threats ++ b.threats, + semantics = a.semantics ++ b.semantics, + sinkSkipList = a.sinkSkipList ++ b.sinkSkipList, + systemConfig = a.systemConfig ++ b.systemConfig, + auditConfig = a.auditConfig ++ b.auditConfig, + inferences = a.inferences ++ b.inferences, + dedRules = a.dedRules ++ b.dedRules + ) + ) + catch { + case ex: Throwable => + logger.error("File error: ", ex) + logger.error(s"Rules path $rulesPath is not accessible") + exit(1) + } + parsedRules + } + + def mergePatterns(ruleInfoList: List[RuleInfo]): List[RuleInfo] = { + ruleInfoList + .groupBy(_.id) + .map { case (_, item) => + val combinedPatterns = item.flatMap(_.patterns) + item.head.copy(patterns = combinedPatterns) + } + .toList + } + + def processRules( + lang: Set[Language], + ruleCache: RuleCache, + appCache: AppCache, + statsRecorder: StatsRecorder, + config: PrivadoInput + ): ConfigAndRules = { + statsRecorder.initiateNewStage("Processing rules") + var internalConfigAndRules = getEmptyConfigAndRule + if (!config.ignoreInternalRules) { + try { + appCache.privadoVersionMain = File((s"${config.internalConfigPath.head}/version.txt")).contentAsString + } catch { + case _: Exception => + appCache.privadoVersionMain = Constants.notDetected + } + println(s"Privado Main Version: ${appCache.privadoVersionMain}") + internalConfigAndRules = parseRules(config.internalConfigPath.head, lang) + ruleCache.setInternalRules(internalConfigAndRules) + } + var externalConfigAndRules = getEmptyConfigAndRule + if (config.externalConfigPath.nonEmpty) { + externalConfigAndRules = parseRules(config.externalConfigPath.head, lang, isExternal = true) + } + if (appCache.excludeFileRegex.isDefined && appCache.excludeFileRegex.get.nonEmpty) { + val excludeFileRegexRule = RuleInfo( + "PrivadoInput.Exclusion", + "Command Line Exclusion Rule", + "", + FilterProperty.CODE, + Array.empty, + List(appCache.excludeFileRegex.get) + ) + externalConfigAndRules = + externalConfigAndRules.copy(exclusions = externalConfigAndRules.exclusions.appended(excludeFileRegexRule)) + } + /* + * NOTE: We want to override the external rules over internal in case of duplicates by id. + * While concatenating two lists (internal and external) and get the distinct list of elements. + * Elements from the first collection will be kept and elements from second collection will be discarded. + * + * e.g + * val sources = externalRules.sources ++ internalRules.sources + * sources.distinctBy(_.id) - this will return unique list of elements duplicated by id. + * In case of duplicates it will keep the elements from "externalRules.sources". + * We don't know the internal logic. We came to this conclusion based on testing few samples. + */ + val exclusions = externalConfigAndRules.exclusions ++ internalConfigAndRules.exclusions + val sources = externalConfigAndRules.sources ++ internalConfigAndRules.sources + val sinks = externalConfigAndRules.sinks ++ internalConfigAndRules.sinks + val collections = externalConfigAndRules.collections ++ internalConfigAndRules.collections + val policies = externalConfigAndRules.policies ++ internalConfigAndRules.policies + val threats = externalConfigAndRules.threats ++ internalConfigAndRules.threats + val semantics = externalConfigAndRules.semantics ++ internalConfigAndRules.semantics + val sinkSkipList = externalConfigAndRules.sinkSkipList ++ internalConfigAndRules.sinkSkipList + val systemConfig = externalConfigAndRules.systemConfig ++ internalConfigAndRules.systemConfig + val auditConfig = externalConfigAndRules.auditConfig ++ internalConfigAndRules.auditConfig + val inferences = externalConfigAndRules.inferences ++ internalConfigAndRules.inferences + val dedRules = externalConfigAndRules.dedRules ++ internalConfigAndRules.dedRules + val mergedRules = + ConfigAndRules( + sources = mergePatterns(sources), + sinks = mergePatterns(sinks), + collections = mergePatterns(collections), + policies = policies.distinctBy(_.id), + exclusions = mergePatterns(exclusions), + threats = threats.distinctBy(_.id), + semantics = semantics.distinctBy(_.signature), + sinkSkipList = sinkSkipList.distinctBy(_.id), + systemConfig = systemConfig, + auditConfig = auditConfig.distinctBy(_.id), + inferences = mergePatterns(inferences), + dedRules = dedRules + ) + logger.trace(mergedRules.toString) + statsRecorder.justLogMessage(s"- Configuration parsed...") + + ruleCache.internalPolicies.addAll(internalConfigAndRules.policies.map(policy => (policy.id))) + ruleCache.internalPolicies.addAll(internalConfigAndRules.threats.map(threat => (threat.id))) + MetricHandler.metricsData("noOfRulesUsed") = { + Json.fromInt( + mergedRules.sources.size + + mergedRules.sinks.size + + mergedRules.collections.size + + mergedRules.policies.size + + mergedRules.exclusions.size + + mergedRules.auditConfig.size + + mergedRules.inferences.size + + mergedRules.dedRules.size + ) + } + statsRecorder.endLastStage() + mergedRules + } + +} diff --git a/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala b/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala index df7dd1b02..99fcdfe4d 100644 --- a/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala +++ b/src/main/scala/ai/privado/entrypoint/ScanProcessor.scala @@ -38,332 +38,20 @@ import ai.privado.metadata.SystemInfo import ai.privado.metric.MetricHandler import ai.privado.model.* import ai.privado.model.Language.{Language, UNKNOWN} -import ai.privado.rulevalidator.YamlFileValidator -import ai.privado.utility.Utilities.{isValidDEDRule, isValidRule} import ai.privado.utility.StatsRecorder import better.files.File import io.circe.Json -import io.circe.yaml.parser import io.joern.console.cpgcreation.guessLanguage import io.joern.x2cpg.SourceFiles -import io.shiftleft.codepropertygraph.generated.Languages import org.slf4j.LoggerFactory import privado_core.BuildInfo -import java.util.Calendar -import scala.collection.parallel.CollectionConverters.ImmutableIterableIsParallelizable import scala.sys.exit -import scala.util.{Failure, Success, Try} +import scala.util.{Try} -object ScanProcessor extends CommandProcessor { +object ScanProcessor extends CommandProcessor with RuleProcessor { private val logger = LoggerFactory.getLogger(this.getClass) - def getEmptyConfigAndRule: ConfigAndRules = - ConfigAndRules( - List[RuleInfo](), - List[RuleInfo](), - List[RuleInfo](), - List[PolicyOrThreat](), - List[PolicyOrThreat](), - List[RuleInfo](), - List[Semantic](), - List[RuleInfo](), - List[SystemConfig](), - List[RuleInfo](), - List[RuleInfo](), - List[DEDRuleInfo]() - ) - - def parseRules(rulesPath: String, lang: Set[Language], isExternal: Boolean = false): ConfigAndRules = { - logger.trace(s"parsing rules from -> '$rulesPath'") - val ir: File = { - // e.g. rulesPath = /home/pandurang/projects/rules-home/ - try File(rulesPath) - catch { - case ex: Throwable => - logger.debug("File error: ", ex) - logger.error(s"Exception while processing rules on path $rulesPath") - exit(1) - } - } - - def filterByLang(rule: RuleInfo): Boolean = - lang.contains(rule.language) || rule.language == Language.DEFAULT || rule.language == Language.UNKNOWN - def filterDEDByLang(rule: DEDRuleInfo): Boolean = - lang.contains(rule.language) || rule.language == Language.DEFAULT || rule.language == Language.UNKNOWN - def filterSemanticByLang(rule: Semantic): Boolean = - lang.contains(rule.language) || rule.language == Language.DEFAULT || rule.language == Language.UNKNOWN - def filterSystemConfigByLang(rule: SystemConfig): Boolean = - lang.contains(rule.language) || rule.language == Language.DEFAULT || rule.language == Language.UNKNOWN - val parsedRules = - try - ir.listRecursively.toList.par - .filter(f => - ((f.extension(toLowerCase = true).toString.contains(".yaml") || - f.extension(toLowerCase = true).toString.contains(".yml")) && - YamlFileValidator.isValidRuleFile(f, ir)) - ) - .map(file => { - // e.g. fullPath = /home/pandurang/projects/rules-home/rules/sources/accounts.yaml - val fullPath = file.pathAsString - logger.trace(s"parsing -> '$fullPath'") - // e.g. relPath = rules/sources/accounts - val relPath = fullPath.substring(ir.pathAsString.length + 1).split("\\.").head - val pathTree = relPath.split("/") - parser.parse(file.contentAsString) match { - case Right(json) => - import ai.privado.model.CirceEnDe.* - json.as[ConfigAndRules] match { - case Right(configAndRules: ConfigAndRules) => - configAndRules.copy( - exclusions = configAndRules.exclusions - .map(x => - x.copy( - file = fullPath, - categoryTree = pathTree, - language = Language.withNameWithDefault(pathTree.last) - ) - ) - .filter(filterByLang), - sources = configAndRules.sources - .filter(rule => isValidRule(rule.combinedRulePattern, rule.id, fullPath)) - .map(x => - x.copy( - file = fullPath, - catLevelOne = CatLevelOne.withNameWithDefault(pathTree.apply(1)), - categoryTree = pathTree, - language = Language.withNameWithDefault(pathTree.last), - nodeType = NodeType.REGULAR, - isExternal = isExternal - ) - ) - .filter(filterByLang), - sinks = configAndRules.sinks - .filter(rule => isValidRule(rule.combinedRulePattern, rule.id, fullPath)) - .map(x => - x.copy( - file = fullPath, - catLevelOne = CatLevelOne.withNameWithDefault(pathTree.apply(1)), - catLevelTwo = pathTree.apply(2), - categoryTree = pathTree, - language = Language.withNameWithDefault(pathTree.last), - nodeType = NodeType.withNameWithDefault(pathTree.apply(3)), - isExternal = isExternal - ) - ) - .filter(filterByLang), - collections = configAndRules.collections - .filter(rule => isValidRule(rule.combinedRulePattern, rule.id, fullPath)) - .map(x => - x.copy( - file = fullPath, - catLevelOne = CatLevelOne.withNameWithDefault(pathTree.apply(1)), - catLevelTwo = pathTree.apply(2), - categoryTree = pathTree, - nodeType = NodeType.REGULAR, - isExternal = isExternal - ) - ) - .filter(filterByLang), - policies = configAndRules.policies.map(x => x.copy(file = fullPath, categoryTree = pathTree)), - semantics = configAndRules.semantics - .map(x => - x.copy( - file = fullPath, - categoryTree = pathTree, - language = Language.withNameWithDefault(pathTree.last) - ) - ) - .filter(filterSemanticByLang), - sinkSkipList = configAndRules.sinkSkipList - .map(x => - x.copy( - file = fullPath, - categoryTree = pathTree, - language = Language.withNameWithDefault(pathTree.last) - ) - ) - .filter(filterByLang), - systemConfig = configAndRules.systemConfig - .map(x => - x.copy( - file = fullPath, - categoryTree = pathTree, - language = Language.withNameWithDefault(pathTree.last) - ) - ) - .filter(filterSystemConfigByLang), - auditConfig = configAndRules.auditConfig - .map(x => - x.copy( - file = fullPath, - categoryTree = pathTree, - catLevelTwo = pathTree.apply(2), - language = Language.withNameWithDefault(pathTree.last) - ) - ) - .filter(filterByLang), - inferences = configAndRules.inferences - .filter(rule => isValidRule(rule.combinedRulePattern, rule.id, fullPath)) - .map(x => - x.copy( - file = fullPath, - catLevelOne = CatLevelOne.INFERENCES, - catLevelTwo = pathTree.apply(2), - categoryTree = pathTree, - language = Language.withNameWithDefault(pathTree.last), - nodeType = NodeType.withNameWithDefault(pathTree.apply(3)), - isExternal = isExternal - ) - ) - .filter(filterByLang), - dedRules = configAndRules.dedRules - .filter(rule => isValidDEDRule(rule)) - .map(x => - x.copy( - file = fullPath, - catLevelOne = CatLevelOne.DED, - catLevelTwo = pathTree.apply(1), - categoryTree = pathTree, - language = Language.withNameWithDefault(pathTree.last), - nodeType = NodeType.withNameWithDefault(pathTree.apply(2)), - isExternal = isExternal - ) - ) - .filter(filterDEDByLang) - ) - case Left(error) => - logger.error("Error while parsing this file -> '" + fullPath) - logger.error("ERROR : ", error) - getEmptyConfigAndRule - } - case Left(error) => - logger.error("Error while parsing this file -> '" + fullPath) - logger.error("ERROR : ", error) - getEmptyConfigAndRule - } - }) - .foldLeft(getEmptyConfigAndRule)((a, b) => - a.copy( - sources = a.sources ++ b.sources, - sinks = a.sinks ++ b.sinks, - collections = a.collections ++ b.collections, - policies = a.policies ++ b.policies, - exclusions = a.exclusions ++ b.exclusions, - threats = a.threats ++ b.threats, - semantics = a.semantics ++ b.semantics, - sinkSkipList = a.sinkSkipList ++ b.sinkSkipList, - systemConfig = a.systemConfig ++ b.systemConfig, - auditConfig = a.auditConfig ++ b.auditConfig, - inferences = a.inferences ++ b.inferences, - dedRules = a.dedRules ++ b.dedRules - ) - ) - catch { - case ex: Throwable => - logger.error("File error: ", ex) - logger.error(s"Rules path $rulesPath is not accessible") - exit(1) - } - parsedRules - } - def mergePatterns(ruleInfoList: List[RuleInfo]): List[RuleInfo] = { - ruleInfoList - .groupBy(_.id) - .map { case (_, item) => - val combinedPatterns = item.flatMap(_.patterns) - item.head.copy(patterns = combinedPatterns) - } - .toList - } - def processRules(lang: Set[Language], ruleCache: RuleCache, appCache: AppCache): ConfigAndRules = { - statsRecorder.initiateNewStage("Processing rules") - var internalConfigAndRules = getEmptyConfigAndRule - if (!config.ignoreInternalRules) { - try { - appCache.privadoVersionMain = File((s"${config.internalConfigPath.head}/version.txt")).contentAsString - } catch { - case _: Exception => - appCache.privadoVersionMain = Constants.notDetected - } - println(s"Privado Main Version: ${appCache.privadoVersionMain}") - internalConfigAndRules = parseRules(config.internalConfigPath.head, lang) - ruleCache.setInternalRules(internalConfigAndRules) - } - var externalConfigAndRules = getEmptyConfigAndRule - if (config.externalConfigPath.nonEmpty) { - externalConfigAndRules = parseRules(config.externalConfigPath.head, lang, isExternal = true) - } - if (appCache.excludeFileRegex.isDefined && appCache.excludeFileRegex.get.nonEmpty) { - val excludeFileRegexRule = RuleInfo( - "PrivadoInput.Exclusion", - "Command Line Exclusion Rule", - "", - FilterProperty.CODE, - Array.empty, - List(appCache.excludeFileRegex.get) - ) - externalConfigAndRules = - externalConfigAndRules.copy(exclusions = externalConfigAndRules.exclusions.appended(excludeFileRegexRule)) - } - /* - * NOTE: We want to override the external rules over internal in case of duplicates by id. - * While concatenating two lists (internal and external) and get the distinct list of elements. - * Elements from the first collection will be kept and elements from second collection will be discarded. - * - * e.g - * val sources = externalRules.sources ++ internalRules.sources - * sources.distinctBy(_.id) - this will return unique list of elements duplicated by id. - * In case of duplicates it will keep the elements from "externalRules.sources". - * We don't know the internal logic. We came to this conclusion based on testing few samples. - */ - val exclusions = externalConfigAndRules.exclusions ++ internalConfigAndRules.exclusions - val sources = externalConfigAndRules.sources ++ internalConfigAndRules.sources - val sinks = externalConfigAndRules.sinks ++ internalConfigAndRules.sinks - val collections = externalConfigAndRules.collections ++ internalConfigAndRules.collections - val policies = externalConfigAndRules.policies ++ internalConfigAndRules.policies - val threats = externalConfigAndRules.threats ++ internalConfigAndRules.threats - val semantics = externalConfigAndRules.semantics ++ internalConfigAndRules.semantics - val sinkSkipList = externalConfigAndRules.sinkSkipList ++ internalConfigAndRules.sinkSkipList - val systemConfig = externalConfigAndRules.systemConfig ++ internalConfigAndRules.systemConfig - val auditConfig = externalConfigAndRules.auditConfig ++ internalConfigAndRules.auditConfig - val inferences = externalConfigAndRules.inferences ++ internalConfigAndRules.inferences - val dedRules = externalConfigAndRules.dedRules ++ internalConfigAndRules.dedRules - val mergedRules = - ConfigAndRules( - sources = mergePatterns(sources), - sinks = mergePatterns(sinks), - collections = mergePatterns(collections), - policies = policies.distinctBy(_.id), - exclusions = mergePatterns(exclusions), - threats = threats.distinctBy(_.id), - semantics = semantics.distinctBy(_.signature), - sinkSkipList = sinkSkipList.distinctBy(_.id), - systemConfig = systemConfig, - auditConfig = auditConfig.distinctBy(_.id), - inferences = mergePatterns(inferences), - dedRules = dedRules - ) - logger.trace(mergedRules.toString) - statsRecorder.justLogMessage(s"- Configuration parsed...") - - ruleCache.internalPolicies.addAll(internalConfigAndRules.policies.map(policy => (policy.id))) - ruleCache.internalPolicies.addAll(internalConfigAndRules.threats.map(threat => (threat.id))) - MetricHandler.metricsData("noOfRulesUsed") = { - Json.fromInt( - mergedRules.sources.size + - mergedRules.sinks.size + - mergedRules.collections.size + - mergedRules.policies.size + - mergedRules.exclusions.size + - mergedRules.auditConfig.size + - mergedRules.inferences.size + - mergedRules.dedRules.size - ) - } - statsRecorder.endLastStage() - mergedRules - } override def process(appCache: AppCache): Either[String, Unit] = { println(s"Privado CLI Version: ${Environment.privadoVersionCli.getOrElse(Constants.notDetected)}") println(s"Privado Core Version: ${Environment.privadoVersionCore}") @@ -387,24 +75,11 @@ object ScanProcessor extends CommandProcessor { private val s3DatabaseDetailsCache = new S3DatabaseDetailsCache private val propertyFilterCache = new PropertyFilterCache() private val databaseDetailsCache = new DatabaseDetailsCache() + private val fileLinkingMetadata = new FileLinkingMetadata() def getDataflowCache: DataFlowCache = { new DataFlowCache(config, auditCache) } - /** Helper function to process rule for a language - * @param lang - * @return - * processed rules - */ - def getProcessedRule(lang: Set[Language], appCache: AppCache): RuleCache = { - appCache.repoLanguage = - lang.head // we are caching the repo language here, and we will use this to get the repo's lang - val ruleCache = new RuleCache() - val processedRules = processRules(lang, ruleCache, appCache) - ruleCache.setRule(processedRules) - ruleCache - } - private def processCpg(appCache: AppCache): Either[String, Unit] = { val sourceRepoLocation = File(config.sourceLocation.head).path.toAbsolutePath.toString.stripSuffix("/") val excludeFileRegex = config.excludeFileRegex @@ -425,7 +100,7 @@ object ScanProcessor extends CommandProcessor { languageDetected match { case Language.JAVA => statsRecorder.justLogMessage("Detected language 'Java'") - val kotlinPlusJavaRules = getProcessedRule(Set(Language.KOTLIN, Language.JAVA), appCache) + val kotlinPlusJavaRules = getProcessedRule(Set(Language.KOTLIN, Language.JAVA), appCache, statsRecorder, config) val filesWithKtExtension = SourceFiles.determine( sourceRepoLocation, Set(".kt"), @@ -433,7 +108,7 @@ object ScanProcessor extends CommandProcessor { ) if (filesWithKtExtension.isEmpty) JavaProcessor( - getProcessedRule(Set(Language.JAVA), appCache), + getProcessedRule(Set(Language.JAVA), appCache, statsRecorder, config), this.config, sourceRepoLocation, dataFlowCache = getDataflowCache, @@ -442,7 +117,8 @@ object ScanProcessor extends CommandProcessor { appCache, statsRecorder = statsRecorder, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata ).processCpg() else KotlinProcessor( @@ -455,12 +131,13 @@ object ScanProcessor extends CommandProcessor { appCache, statsRecorder = statsRecorder, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata ).processCpg() case Language.JAVASCRIPT => statsRecorder.justLogMessage("Detected language 'JavaScript'") new JavascriptProcessor( - getProcessedRule(Set(Language.JAVASCRIPT), appCache), + getProcessedRule(Set(Language.JAVASCRIPT), appCache, statsRecorder, config), this.config, sourceRepoLocation, dataFlowCache = getDataflowCache, @@ -469,12 +146,13 @@ object ScanProcessor extends CommandProcessor { appCache, statsRecorder = statsRecorder, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata ).processCpg() case Language.PYTHON => statsRecorder.justLogMessage("Detected language 'Python'") new PythonProcessor( - getProcessedRule(Set(Language.PYTHON), appCache), + getProcessedRule(Set(Language.PYTHON), appCache, statsRecorder, config), this.config, sourceRepoLocation, dataFlowCache = getDataflowCache, @@ -483,12 +161,13 @@ object ScanProcessor extends CommandProcessor { appCache, propertyFilterCache = propertyFilterCache, databaseDetailsCache = databaseDetailsCache, - statsRecorder = statsRecorder + statsRecorder = statsRecorder, + fileLinkingMetadata = fileLinkingMetadata ).processCpg() case Language.RUBY => statsRecorder.justLogMessage("Detected language 'Ruby'") new RubyProcessor( - getProcessedRule(Set(Language.RUBY), appCache), + getProcessedRule(Set(Language.RUBY), appCache, statsRecorder, config), this.config, sourceRepoLocation, dataFlowCache = getDataflowCache, @@ -496,12 +175,13 @@ object ScanProcessor extends CommandProcessor { s3DatabaseDetailsCache, appCache, propertyFilterCache = propertyFilterCache, - statsRecorder = statsRecorder + statsRecorder = statsRecorder, + fileLinkingMetadata = fileLinkingMetadata ).processCpg() case Language.GO => statsRecorder.justLogMessage("Detected language 'Go'") new GoProcessor( - getProcessedRule(Set(Language.GO), appCache), + getProcessedRule(Set(Language.GO), appCache, statsRecorder, config), this.config, sourceRepoLocation, dataFlowCache = getDataflowCache, @@ -510,12 +190,13 @@ object ScanProcessor extends CommandProcessor { appCache, propertyFilterCache = propertyFilterCache, statsRecorder = statsRecorder, - databaseDetailsCache = databaseDetailsCache + databaseDetailsCache = databaseDetailsCache, + fileLinkingMetadata = fileLinkingMetadata ).processCpg() case Language.KOTLIN => statsRecorder.justLogMessage("Detected language 'Kotlin'") KotlinProcessor( - getProcessedRule(Set(Language.KOTLIN, Language.JAVA), appCache), + getProcessedRule(Set(Language.KOTLIN, Language.JAVA), appCache, statsRecorder, config), this.config, sourceRepoLocation, dataFlowCache = getDataflowCache, @@ -524,12 +205,13 @@ object ScanProcessor extends CommandProcessor { appCache, statsRecorder = statsRecorder, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata ).processCpg() case Language.CSHARP => statsRecorder.justLogMessage("Detected language 'C#'") CSharpProcessor( - getProcessedRule(Set(Language.CSHARP), appCache), + getProcessedRule(Set(Language.CSHARP), appCache, statsRecorder, config), this.config, sourceRepoLocation, dataFlowCache = getDataflowCache, @@ -538,12 +220,13 @@ object ScanProcessor extends CommandProcessor { appCache, statsRecorder = statsRecorder, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata ).processCpg() case Language.PHP => statsRecorder.justLogMessage("Detected language 'PHP'") PhpProcessor( - getProcessedRule(Set(Language.PHP), appCache), + getProcessedRule(Set(Language.PHP), appCache, statsRecorder, config), this.config, sourceRepoLocation, dataFlowCache = getDataflowCache, @@ -552,13 +235,14 @@ object ScanProcessor extends CommandProcessor { appCache, statsRecorder = statsRecorder, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata ) .processCpg() case Language.C => statsRecorder.justLogMessage("Detected language 'C'") CProcessor( - getProcessedRule(Set(Language.C), appCache), + getProcessedRule(Set(Language.C), appCache, statsRecorder, config), this.config, sourceRepoLocation, dataFlowCache = getDataflowCache, @@ -567,11 +251,12 @@ object ScanProcessor extends CommandProcessor { appCache, statsRecorder = statsRecorder, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata ) .processCpg() case _ => - processCpgWithDefaultProcessor(sourceRepoLocation, appCache, statsRecorder) + processCpgWithDefaultProcessor(sourceRepoLocation, appCache, statsRecorder, fileLinkingMetadata) } match { case Left(err: String) => Left(err) case _ => @@ -584,12 +269,13 @@ object ScanProcessor extends CommandProcessor { private def processCpgWithDefaultProcessor( sourceRepoLocation: String, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ) = { MetricHandler.metricsData("language") = Json.fromString("default") statsRecorder.justLogMessage("Running scan with default processor.") DefaultProcessor( - getProcessedRule(Set(Language.UNKNOWN), appCache), + getProcessedRule(Set(Language.UNKNOWN), appCache, statsRecorder, config), this.config, sourceRepoLocation, getDataflowCache, @@ -598,7 +284,8 @@ object ScanProcessor extends CommandProcessor { appCache, statsRecorder = statsRecorder, databaseDetailsCache = databaseDetailsCache, - propertyFilterCache = propertyFilterCache + propertyFilterCache = propertyFilterCache, + fileLinkingMetadata = fileLinkingMetadata ).processCpg() } diff --git a/src/main/scala/ai/privado/exporter/JSONExporter.scala b/src/main/scala/ai/privado/exporter/JSONExporter.scala index da6276504..2564e90d3 100644 --- a/src/main/scala/ai/privado/exporter/JSONExporter.scala +++ b/src/main/scala/ai/privado/exporter/JSONExporter.scala @@ -30,6 +30,7 @@ import ai.privado.cache.{ DataFlowCache, DatabaseDetailsCache, Environment, + FileLinkingMetadata, PropertyFilterCache, RuleCache, S3DatabaseDetailsCache, @@ -38,7 +39,7 @@ import ai.privado.cache.{ import ai.privado.entrypoint.PrivadoInput import ai.privado.languageEngine.default.NodeStarters import ai.privado.metric.MetricHandler -import ai.privado.model.Constants.{outputDirectoryName, value} +import ai.privado.model.Constants.{namespaceDependency, outputDirectoryName, value} import ai.privado.model.exporter.{ AndroidPermissionModel, CollectionModel, @@ -73,7 +74,7 @@ import scala.concurrent.* import scala.language.postfixOps import scala.util.{Failure, Success, Try} import io.shiftleft.semanticcpg.language.* - +import ai.privado.languageEngine.java.language.{NodeStarters, StepsForProperty} object JSONExporter { private val logger = LoggerFactory.getLogger(getClass) @@ -198,6 +199,55 @@ object JSONExporter { } } + def fileLinkingExport( + cpg: Cpg, + outputFileName: String, + repoPath: String, + fileLinkingMetadata: FileLinkingMetadata + ): Either[String, Unit] = { + logger.info("Initiated the file linking metadata exporter engine") + val output = mutable.LinkedHashMap[String, Json]() + try { + output.addOne(Constants.dataflowDependency -> fileLinkingMetadata.getDataflowMap.asJson) + val propertyAndUsedAt = cpg.property + .map(p => (p.file.name.headOption.getOrElse(""), p.start.usedAt.file.name.dedup.l)) + .groupBy(_._1) + .map(entry => (entry._1, entry._2.flatMap(_._2).distinct)) + .filter(entrySet => entrySet._2.nonEmpty) + + output.addOne(Constants.propertyDependency -> propertyAndUsedAt.asJson) + output.addOne(Constants.propertyFiles -> cpg.property.file.name.dedup.l.asJson) + + /** For Java the namespace is working as expected, for languages like JS, Python we are getting the namespace as + * <`global`> for nearly all files + * + * Which reflects that the idea of namespace doesn't exist in these languages the files placed under same folder + * are not available by default, we need to relatively import them + */ + val namespaceToFileMapping = cpg.namespace + .map(n => (n.name, n.file.name.l)) + .groupBy(_._1) + .map(entrySet => (entrySet._1, entrySet._2.flatMap(_._2).distinct)) + + output.addOne(Constants.namespaceDependency -> namespaceToFileMapping.asJson) + + output.addOne(Constants.importDependency -> fileLinkingMetadata.getFileImportMap.asJson) + + val outputDir = File(s"$repoPath/$outputDirectoryName").createDirectoryIfNotExists() + val f = File(s"$repoPath/$outputDirectoryName/$outputFileName") + f.write(output.asJson.toString()) + logger.info("Shutting down file linking metadata exporter engine") + Right(()) + + } catch { + case ex: Exception => + println("Failed to export file linking metadata output") + logger.debug(ex.getStackTrace.mkString("\n")) + logger.debug("Failed to export file linking metadata output", ex) + Left(ex.toString) + } + } + def dataElementDiscoveryAuditFileExport( outputFileName: String, repoPath: String, diff --git a/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala b/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala index 75400040f..3076c85a7 100644 --- a/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/base/processor/BaseProcessor.scala @@ -24,6 +24,7 @@ import io.joern.x2cpg.X2CpgConfig import io.shiftleft.codepropertygraph.generated.Cpg import io.shiftleft.codepropertygraph.generated.nodes.ModuleDependency import io.shiftleft.passes.CpgPassBase +import io.shiftleft.semanticcpg.language.* import io.shiftleft.semanticcpg.layers.LayerCreatorContext import org.slf4j.{Logger, LoggerFactory} @@ -43,7 +44,8 @@ abstract class BaseProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean, databaseDetailsCache: DatabaseDetailsCache, - propertyFilterCache: PropertyFilterCache = new PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) { val logger: Logger = LoggerFactory.getLogger(getClass) @@ -132,6 +134,12 @@ abstract class BaseProcessor( val dataflowMap = Dataflow(cpg, statsRecorder).dataflow(privadoInput, ruleCache, dataFlowCache, auditCache, appCache) statsRecorder.endLastStage() + + if (privadoInput.fileLinkingReport) { + // Add dataflow data to FileLinkingMetadata + val dataflowFiles = dataflowMap.values.map(path => path.elements.flatMap(_.file.name).dedup.l).l + fileLinkingMetadata.addToDataflowMap(dataflowFiles) + } statsRecorder.justLogMessage(s"Processed final flows - ${dataFlowCache.getDataflowAfterDedup.size}") statsRecorder.initiateNewStage("Brewing result") @@ -171,6 +179,10 @@ abstract class BaseProcessor( case Left(err) => errorMsgs.addOne(err) case Right(_) => + fileLinkingReportExport(cpg) match + case Left(err) => errorMsgs.addOne(err) + case Right(_) => + applyJsonExport(cpg, taggerCache, dataflowMap, s3DatabaseDetailsCache, appCache) match case Left(err) => errorMsgs.addOne(err) @@ -301,6 +313,21 @@ abstract class BaseProcessor( Right(()) } + protected def fileLinkingReportExport(cpg: Cpg): Either[String, Unit] = { + if (privadoInput.fileLinkingReport) { + JSONExporter.fileLinkingExport(cpg, outputFileLinkingFileName, sourceRepoLocation, fileLinkingMetadata) match + case Left(err) => + MetricHandler.otherErrorsOrWarnings.addOne(err) + Left(err) + case Right(_) => + statsRecorder.justLogMessage( + s"Successfully exported file linking output to '${appCache.localScanPath}/$outputDirectoryName/$outputFileLinkingFileName' folder..." + ) + Right(()) + } else + Right(()) + } + protected def reportUnresolvedMethods(cpg: Cpg, lang: Language): Unit = { // Unresolved function report if (privadoInput.showUnresolvedFunctionsReport) { diff --git a/src/main/scala/ai/privado/languageEngine/c/processor/CProcessor.scala b/src/main/scala/ai/privado/languageEngine/c/processor/CProcessor.scala index 62f95f4f7..3e1f1212b 100644 --- a/src/main/scala/ai/privado/languageEngine/c/processor/CProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/c/processor/CProcessor.scala @@ -5,6 +5,7 @@ import ai.privado.cache.{ AuditCache, DataFlowCache, DatabaseDetailsCache, + FileLinkingMetadata, PropertyFilterCache, RuleCache, S3DatabaseDetailsCache, @@ -34,7 +35,8 @@ class CProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean = true, databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), - propertyFilterCache: PropertyFilterCache = new PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) extends BaseProcessor( ruleCache, privadoInput, @@ -47,13 +49,23 @@ class CProcessor( statsRecorder, returnClosedCpg, databaseDetailsCache, - propertyFilterCache + propertyFilterCache, + fileLinkingMetadata ) { private val logger = LoggerFactory.getLogger(getClass) override def runPrivadoTagger(cpg: Cpg, taggerCache: TaggerCache): Unit = { - cpg.runTagger(ruleCache, taggerCache, privadoInput, dataFlowCache, appCache, databaseDetailsCache, statsRecorder) + cpg.runTagger( + ruleCache, + taggerCache, + privadoInput, + dataFlowCache, + appCache, + databaseDetailsCache, + statsRecorder, + fileLinkingMetadata + ) } override def processCpg(): Either[String, CpgWithOutputMap] = { diff --git a/src/main/scala/ai/privado/languageEngine/c/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/c/tagger/PrivadoTagger.scala index 34a37a680..8eac1ac0b 100644 --- a/src/main/scala/ai/privado/languageEngine/c/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/c/tagger/PrivadoTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.c.tagger -import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, TaggerCache} +import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, FileLinkingMetadata, RuleCache, TaggerCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.languageEngine.c.tagger.source.IdentifierTagger import ai.privado.tagger.PrivadoBaseTagger @@ -23,7 +23,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { dataFlowCache: DataFlowCache, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Traversal[Tag] = { logger.info("Beginning tagging") diff --git a/src/main/scala/ai/privado/languageEngine/csharp/processor/CSharpProcessor.scala b/src/main/scala/ai/privado/languageEngine/csharp/processor/CSharpProcessor.scala index fb5722a8b..337b346c6 100644 --- a/src/main/scala/ai/privado/languageEngine/csharp/processor/CSharpProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/csharp/processor/CSharpProcessor.scala @@ -69,7 +69,8 @@ class CSharpProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean = true, databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), - propertyFilterCache: PropertyFilterCache = new PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) extends BaseProcessor( ruleCache, privadoInput, @@ -82,7 +83,8 @@ class CSharpProcessor( statsRecorder, returnClosedCpg, databaseDetailsCache, - propertyFilterCache + propertyFilterCache, + fileLinkingMetadata ) { private val logger = LoggerFactory.getLogger(getClass) @@ -91,7 +93,16 @@ class CSharpProcessor( } override def runPrivadoTagger(cpg: Cpg, taggerCache: TaggerCache): Unit = { - cpg.runTagger(ruleCache, taggerCache, privadoInput, dataFlowCache, appCache, databaseDetailsCache, statsRecorder) + cpg.runTagger( + ruleCache, + taggerCache, + privadoInput, + dataFlowCache, + appCache, + databaseDetailsCache, + statsRecorder, + fileLinkingMetadata + ) } override def applyDataflowAndPostProcessingPasses(cpg: Cpg): Unit = { diff --git a/src/main/scala/ai/privado/languageEngine/csharp/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/csharp/tagger/PrivadoTagger.scala index 4a9edcb9b..b7c254470 100644 --- a/src/main/scala/ai/privado/languageEngine/csharp/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/csharp/tagger/PrivadoTagger.scala @@ -23,7 +23,7 @@ package ai.privado.languageEngine.csharp.tagger -import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, TaggerCache} +import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, FileLinkingMetadata, RuleCache, TaggerCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.languageEngine.csharp.tagger.collection.CollectionTagger import ai.privado.languageEngine.csharp.tagger.sink.{CSharpAPISinkTagger, CSharpAPITagger} @@ -48,7 +48,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { dataFlowCache: DataFlowCache, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Traversal[Tag] = { logger.info("Beginning tagging") @@ -57,7 +58,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new IdentifierTagger(cpg, rules, taggerCache).createAndApply() new SqlQueryTagger(cpg, rules).createAndApply() - CSharpAPISinkTagger.applyTagger(cpg, rules, privadoInputConfig, appCache, statsRecorder) + CSharpAPISinkTagger.applyTagger(cpg, rules, privadoInputConfig, appCache, statsRecorder, fileLinkingMetadata) new RegularSinkTagger(cpg, rules, databaseDetailsCache).createAndApply() new CollectionTagger(cpg, rules).createAndApply() diff --git a/src/main/scala/ai/privado/languageEngine/csharp/tagger/sink/CSharpAPISinkTagger.scala b/src/main/scala/ai/privado/languageEngine/csharp/tagger/sink/CSharpAPISinkTagger.scala index b50489710..d60cb2b30 100644 --- a/src/main/scala/ai/privado/languageEngine/csharp/tagger/sink/CSharpAPISinkTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/csharp/tagger/sink/CSharpAPISinkTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.csharp.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.tagger.sink.api.{APISinkByMethodFullNameTagger, APISinkTagger} import ai.privado.utility.StatsRecorder @@ -13,12 +13,13 @@ object CSharpAPISinkTagger extends APISinkTagger { ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Unit = { - super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder) + super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder, fileLinkingMetadata) new APISinkByMethodFullNameTagger(cpg, ruleCache).createAndApply() - new CSharpAPITagger(cpg, ruleCache, privadoInput, appCache).createAndApply() + new CSharpAPITagger(cpg, ruleCache, privadoInput, appCache, fileLinkingMetadata).createAndApply() } } diff --git a/src/main/scala/ai/privado/languageEngine/csharp/tagger/sink/CSharpAPITagger.scala b/src/main/scala/ai/privado/languageEngine/csharp/tagger/sink/CSharpAPITagger.scala index a9b178f63..c5223a50f 100644 --- a/src/main/scala/ai/privado/languageEngine/csharp/tagger/sink/CSharpAPITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/csharp/tagger/sink/CSharpAPITagger.scala @@ -22,7 +22,7 @@ */ package ai.privado.languageEngine.csharp.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.metric.MetricHandler import ai.privado.tagger.sink.APITagger @@ -30,8 +30,13 @@ import io.circe.Json import io.shiftleft.codepropertygraph.generated.Cpg import org.slf4j.LoggerFactory -class CSharpAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache) - extends APITagger(cpg, ruleCache, privadoInput, appCache) { +class CSharpAPITagger( + cpg: Cpg, + ruleCache: RuleCache, + privadoInput: PrivadoInput, + appCache: AppCache, + fileLinkingMetadata: FileLinkingMetadata +) extends APITagger(cpg, ruleCache, privadoInput, appCache, fileLinkingMetadata) { private val logger = LoggerFactory.getLogger(this.getClass) MetricHandler.metricsData("apiTaggerVersion") = Json.fromString("Common HTTP Libraries Used") diff --git a/src/main/scala/ai/privado/languageEngine/default/processor/DefaultProcessor.scala b/src/main/scala/ai/privado/languageEngine/default/processor/DefaultProcessor.scala index 2ab24094f..51d163bad 100644 --- a/src/main/scala/ai/privado/languageEngine/default/processor/DefaultProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/default/processor/DefaultProcessor.scala @@ -56,7 +56,8 @@ class DefaultProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean = true, databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), - propertyFilterCache: PropertyFilterCache = new PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) extends BaseProcessor( ruleCache, privadoInput, @@ -69,7 +70,8 @@ class DefaultProcessor( statsRecorder, returnClosedCpg, databaseDetailsCache, - propertyFilterCache + propertyFilterCache, + fileLinkingMetadata ) { private val logger = LoggerFactory.getLogger(getClass) @@ -85,7 +87,16 @@ class DefaultProcessor( } override def runPrivadoTagger(cpg: Cpg, taggerCache: TaggerCache): Unit = { - cpg.runTagger(ruleCache, taggerCache, privadoInput, dataFlowCache, appCache, databaseDetailsCache, statsRecorder) + cpg.runTagger( + ruleCache, + taggerCache, + privadoInput, + dataFlowCache, + appCache, + databaseDetailsCache, + statsRecorder, + fileLinkingMetadata + ) } override def processCpg(): Either[String, CpgWithOutputMap] = { diff --git a/src/main/scala/ai/privado/languageEngine/default/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/default/tagger/PrivadoTagger.scala index 44e0024a2..de0da11ca 100644 --- a/src/main/scala/ai/privado/languageEngine/default/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/default/tagger/PrivadoTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.default.tagger -import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, TaggerCache} +import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, FileLinkingMetadata, RuleCache, TaggerCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.languageEngine.default.passes.HighTouchDataflow import ai.privado.tagger.source.SqlQueryTagger @@ -22,7 +22,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { dataFlowCache: DataFlowCache, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Traversal[Tag] = { logger.info("Starting tagger") diff --git a/src/main/scala/ai/privado/languageEngine/go/processor/GoProcessor.scala b/src/main/scala/ai/privado/languageEngine/go/processor/GoProcessor.scala index c7b73e28b..a72521d8e 100644 --- a/src/main/scala/ai/privado/languageEngine/go/processor/GoProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/go/processor/GoProcessor.scala @@ -37,7 +37,8 @@ class GoProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean = true, databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), - propertyFilterCache: PropertyFilterCache = new PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) extends BaseProcessor( ruleCache, privadoInput, @@ -50,7 +51,8 @@ class GoProcessor( statsRecorder, returnClosedCpg, databaseDetailsCache, - propertyFilterCache + propertyFilterCache, + fileLinkingMetadata ) { private val logger = LoggerFactory.getLogger(getClass) @@ -70,7 +72,16 @@ class GoProcessor( } override def runPrivadoTagger(cpg: Cpg, taggerCache: TaggerCache): Unit = { - cpg.runTagger(ruleCache, taggerCache, privadoInput, dataFlowCache, appCache, databaseDetailsCache, statsRecorder) + cpg.runTagger( + ruleCache, + taggerCache, + privadoInput, + dataFlowCache, + appCache, + databaseDetailsCache, + statsRecorder, + fileLinkingMetadata + ) } override def processCpg(): Either[String, CpgWithOutputMap] = { diff --git a/src/main/scala/ai/privado/languageEngine/go/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/go/tagger/PrivadoTagger.scala index 450a67611..47662583c 100644 --- a/src/main/scala/ai/privado/languageEngine/go/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/go/tagger/PrivadoTagger.scala @@ -4,7 +4,7 @@ import ai.privado.entrypoint.{PrivadoInput, ScanProcessor} import ai.privado.tagger.PrivadoBaseTagger import io.shiftleft.codepropertygraph.generated.Cpg import ai.privado.tagger.source.{DEDTagger, LiteralTagger, SqlQueryTagger} -import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, TaggerCache} +import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, FileLinkingMetadata, RuleCache, TaggerCache} import ai.privado.languageEngine.go.tagger.collection.CollectionTagger import org.slf4j.LoggerFactory import io.shiftleft.codepropertygraph.generated.nodes.Tag @@ -15,7 +15,7 @@ import ai.privado.languageEngine.go.tagger.config.GoDBConfigTagger import ai.privado.languageEngine.go.tagger.sink.{GoAPISinkTagger, GoAPITagger} import ai.privado.tagger.sink.RegularSinkTagger import ai.privado.utility.StatsRecorder -import ai.privado.utility.Utilities.{databaseURLPriority} +import ai.privado.utility.Utilities.databaseURLPriority class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { private val logger = LoggerFactory.getLogger(this.getClass) @@ -27,7 +27,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { dataFlowCache: DataFlowCache, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Traversal[Tag] = { logger.info("Starting tagging") @@ -42,7 +43,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new GoDBConfigTagger(cpg, databaseDetailsCache).createAndApply() - GoAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder) + GoAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder, fileLinkingMetadata) new RegularSinkTagger(cpg, ruleCache, databaseDetailsCache).createAndApply() diff --git a/src/main/scala/ai/privado/languageEngine/go/tagger/sink/GoAPISinkTagger.scala b/src/main/scala/ai/privado/languageEngine/go/tagger/sink/GoAPISinkTagger.scala index d3a71deb5..5b1d844ea 100644 --- a/src/main/scala/ai/privado/languageEngine/go/tagger/sink/GoAPISinkTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/go/tagger/sink/GoAPISinkTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.go.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.tagger.sink.api.{APISinkByMethodFullNameTagger, APISinkTagger} import ai.privado.utility.StatsRecorder @@ -13,12 +13,13 @@ object GoAPISinkTagger extends APISinkTagger { ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Unit = { - super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder) + super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder, fileLinkingMetadata) new APISinkByMethodFullNameTagger(cpg, ruleCache).createAndApply() - new GoAPITagger(cpg, ruleCache, privadoInput, appCache).createAndApply() + new GoAPITagger(cpg, ruleCache, privadoInput, appCache, fileLinkingMetadata).createAndApply() } } diff --git a/src/main/scala/ai/privado/languageEngine/go/tagger/sink/GoAPITagger.scala b/src/main/scala/ai/privado/languageEngine/go/tagger/sink/GoAPITagger.scala index e93876894..e6f08d099 100644 --- a/src/main/scala/ai/privado/languageEngine/go/tagger/sink/GoAPITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/go/tagger/sink/GoAPITagger.scala @@ -22,7 +22,7 @@ */ package ai.privado.languageEngine.go.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.metric.MetricHandler import ai.privado.tagger.sink.APITagger @@ -30,8 +30,13 @@ import io.circe.Json import io.shiftleft.codepropertygraph.generated.Cpg import org.slf4j.LoggerFactory -class GoAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache) - extends APITagger(cpg, ruleCache, privadoInput, appCache) { +class GoAPITagger( + cpg: Cpg, + ruleCache: RuleCache, + privadoInput: PrivadoInput, + appCache: AppCache, + fileLinkingMetadata: FileLinkingMetadata +) extends APITagger(cpg, ruleCache, privadoInput, appCache, fileLinkingMetadata) { private val logger = LoggerFactory.getLogger(this.getClass) MetricHandler.metricsData("apiTaggerVersion") = Json.fromString("Common HTTP Libraries Used") diff --git a/src/main/scala/ai/privado/languageEngine/java/processor/JavaProcessor.scala b/src/main/scala/ai/privado/languageEngine/java/processor/JavaProcessor.scala index 122f425b8..e5aba947a 100644 --- a/src/main/scala/ai/privado/languageEngine/java/processor/JavaProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/java/processor/JavaProcessor.scala @@ -72,7 +72,8 @@ class JavaProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean = true, databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), - propertyFilterCache: PropertyFilterCache = new PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) extends BaseProcessor( ruleCache, privadoInput, @@ -85,7 +86,8 @@ class JavaProcessor( statsRecorder, returnClosedCpg, databaseDetailsCache, - propertyFilterCache + propertyFilterCache, + fileLinkingMetadata ) { override val logger: Logger = LoggerFactory.getLogger(getClass) @@ -117,7 +119,8 @@ class JavaProcessor( s3DatabaseDetailsCache, appCache, databaseDetailsCache, - statsRecorder + statsRecorder, + fileLinkingMetadata ) override def processCpg(): Either[String, CpgWithOutputMap] = { diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala index aa7395ce7..df48c5f9c 100644 --- a/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/PrivadoTagger.scala @@ -68,7 +68,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { s3DatabaseDetailsCache: S3DatabaseDetailsCache, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Traversal[Tag] = { logger.info("Starting tagging") @@ -88,7 +89,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new JavaS3Tagger(cpg, s3DatabaseDetailsCache, databaseDetailsCache).createAndApply() - JavaAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder) + JavaAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder, fileLinkingMetadata) // Custom Rule tagging if (!privadoInputConfig.ignoreInternalRules) { @@ -99,7 +100,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new MessagingConsumerReadPass(cpg, taggerCache, dataFlowCache, privadoInputConfig, appCache).createAndApply() } - FlinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder) + FlinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder, fileLinkingMetadata) new DatabaseQueryReadPass( cpg, diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala index 0d548fb00..0eb12bd27 100644 --- a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPISinkTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.java.tagger.sink.api -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.tagger.sink.api.{APISinkByMethodFullNameTagger, APISinkTagger} import ai.privado.utility.StatsRecorder @@ -17,10 +17,11 @@ object JavaAPISinkTagger extends APISinkTagger { ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Unit = { - super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder) + super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder, fileLinkingMetadata) new JavaAPIRetrofitTagger(cpg, ruleCache).createAndApply() if (privadoInput.enableAPIByParameter) { @@ -30,7 +31,7 @@ object JavaAPISinkTagger extends APISinkTagger { new APISinkByMethodFullNameTagger(cpg, ruleCache).createAndApply() - new JavaAPITagger(cpg, ruleCache, privadoInput, appCache, statsRecorder).createAndApply() + new JavaAPITagger(cpg, ruleCache, privadoInput, appCache, statsRecorder, fileLinkingMetadata).createAndApply() } } diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPITagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPITagger.scala index 4a9c40fbc..0ded4d2f8 100644 --- a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/api/JavaAPITagger.scala @@ -22,7 +22,7 @@ */ package ai.privado.languageEngine.java.tagger.sink.api -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.{PrivadoInput, ScanProcessor} import ai.privado.languageEngine.java.language.* import ai.privado.languageEngine.java.semantic.JavaSemanticGenerator @@ -61,7 +61,8 @@ class JavaAPITagger( ruleCache: RuleCache, privadoInputConfig: PrivadoInput, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ) extends PrivadoParallelCpgPass[RuleInfo](cpg) { private val logger = LoggerFactory.getLogger(this.getClass) implicit val engineContext: EngineContext = @@ -164,6 +165,7 @@ class JavaAPITagger( ruleInfo, ruleCache, privadoInputConfig, + fileLinkingMetadata, privadoInputConfig.enableAPIDisplay ) sinkTagger( @@ -173,7 +175,8 @@ class JavaAPITagger( builder, ruleInfo, ruleCache, - privadoInputConfig + privadoInputConfig, + fileLinkingMetadata ) case APITaggerVersionJava.V2Tagger => logger.debug("Using Enhanced API tagger to find API sinks") @@ -185,7 +188,8 @@ class JavaAPITagger( builder, ruleInfo, ruleCache, - privadoInputConfig + privadoInputConfig, + fileLinkingMetadata ) case _ => logger.debug("Skipping API Tagger because valid match not found, only applying Feign client") @@ -197,7 +201,8 @@ class JavaAPITagger( builder, ruleInfo, ruleCache, - privadoInputConfig + privadoInputConfig, + fileLinkingMetadata ) } } diff --git a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/framework/flink/FlinkTagger.scala b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/framework/flink/FlinkTagger.scala index 75d1b0000..345ce41d7 100644 --- a/src/main/scala/ai/privado/languageEngine/java/tagger/sink/framework/flink/FlinkTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/java/tagger/sink/framework/flink/FlinkTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.java.tagger.sink.framework.flink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.model.{Constants, FilterProperty, Language, RuleInfo} import ai.privado.tagger.Tagger @@ -15,7 +15,8 @@ object FlinkTagger extends Tagger with TaggerHelper { ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Unit = { // Run flink only if detected if (cpg.imports.importedEntity("org.apache.flink.*").nonEmpty) { diff --git a/src/main/scala/ai/privado/languageEngine/javascript/metadata/FileImportMappingPassJS.scala b/src/main/scala/ai/privado/languageEngine/javascript/metadata/FileImportMappingPassJS.scala new file mode 100644 index 000000000..ea6684dbc --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/javascript/metadata/FileImportMappingPassJS.scala @@ -0,0 +1,66 @@ +package ai.privado.languageEngine.javascript.metadata + +import ai.privado.cache.FileLinkingMetadata +import io.joern.x2cpg.passes.frontend.XImportResolverPass +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.Call + +import java.util.regex.{Matcher, Pattern} +import java.io.File as JFile +import scala.util.Try + +class FileImportMappingPassJS(cpg: Cpg, fileLinkingMetadata: FileLinkingMetadata) + extends XImportResolverPass(cpg: Cpg) { + + private val pathPattern = Pattern.compile("[\"']([\\w/.]+)[\"']") + + override protected def optionalResolveImport( + fileName: String, + importCall: Call, + importedEntity: String, + importedAs: String, + diffGraph: DiffGraphBuilder + ): Unit = { + val pathSep = ":" + val rawEntity = importedEntity.stripPrefix("./") + val alias = importedAs + val matcher = pathPattern.matcher(rawEntity) + val sep = Matcher.quoteReplacement(JFile.separator) + val root = s"$codeRootDir${JFile.separator}" + val currentFile = s"$root$fileName" + val extension = better.files.File(currentFile).`extension`.getOrElse(".ts") + // We want to know if the import is local since if an external name is used to match internal methods we may have + // false paths. + val isLocalImport = importedEntity.matches("^[.]+/?.*") + // TODO: At times there is an operation inside of a require, e.g. path.resolve(__dirname + "/../config/env/all.js") + // this tries to recover the string but does not perform string constant propagation + val entity = if (matcher.find()) matcher.group(1) else rawEntity + + val isImportingModule = !entity.contains(pathSep) + if (isLocalImport) { + val resolvedPath = Try( + better.files + .File(currentFile.stripSuffix(currentFile.split(sep).last), entity.split(pathSep).head) + .pathAsString + .stripPrefix(root) + ).getOrElse(entity) + fileLinkingMetadata.addToFileImportMap(fileName, s"$resolvedPath$extension") + } else { + val seperatedFilePathList = fileName.split(sep).toList + val startingModule = entity.split(sep).head + val moduleIndex = seperatedFilePathList.indexOf(startingModule) + if (moduleIndex != -1) { + Try { + val resolvedPath = better.files + .File(root, seperatedFilePathList.take(moduleIndex).mkString(sep), entity.split(pathSep).head) + .pathAsString + .stripPrefix(root) + fileLinkingMetadata.addToFileImportMap(fileName, s"$resolvedPath$extension") + } + } + + } + + } + +} diff --git a/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptBaseCPGProcessor.scala b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptBaseCPGProcessor.scala new file mode 100644 index 000000000..28b065ccd --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptBaseCPGProcessor.scala @@ -0,0 +1,117 @@ +package ai.privado.languageEngine.javascript.processor + +import ai.privado.cache.{ + AppCache, + AuditCache, + DataFlowCache, + DatabaseDetailsCache, + FileLinkingMetadata, + PropertyFilterCache, + RuleCache, + S3DatabaseDetailsCache, + TaggerCache +} +import ai.privado.entrypoint.PrivadoInput +import ai.privado.metric.MetricHandler +import ai.privado.model.Constants.{cpgOutputFileName, outputDirectoryName} +import ai.privado.model.CpgWithOutputMap +import ai.privado.utility.StatsRecorder +import io.circe.Json +import io.joern.dataflowengineoss.language.Path +import io.shiftleft.codepropertygraph.generated.Cpg +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.mutable.ListBuffer +import scala.util.{Failure, Success, Try} + +class JavascriptBaseCPGProcessor( + ruleCache: RuleCache, + privadoInput: PrivadoInput, + sourceRepoLocation: String, + dataFlowCache: DataFlowCache, + auditCache: AuditCache, + s3DatabaseDetailsCache: S3DatabaseDetailsCache, + appCache: AppCache, + statsRecorder: StatsRecorder, + returnClosedCpg: Boolean = true, + databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() +) extends JavascriptProcessor( + ruleCache, + privadoInput, + sourceRepoLocation, + dataFlowCache, + auditCache, + s3DatabaseDetailsCache, + appCache, + statsRecorder, + returnClosedCpg, + databaseDetailsCache, + propertyFilterCache, + fileLinkingMetadata + ) { + + override val logger: Logger = LoggerFactory.getLogger(this.getClass) + + override def tagAndExport(xtocpg: Try[Cpg]): Either[String, CpgWithOutputMap] = { + xtocpg match { + case Success(cpg) => + try { + statsRecorder.initiateNewStage("overriden overlay Processing") + applyOverridenPasses(cpg) + statsRecorder.endLastStage() + + statsRecorder.initiateNewStage("Run oss data flow") + applyDataflowAndPostProcessingPasses(cpg) + statsRecorder.endLastStage() + statsRecorder.setSupressSubstagesFlag(false) + applyTaggingAndExport(cpg) match + case Left(err) => + logger.debug(s"Errors captured in scanning : $err") + Left(err) + case Right(cpgWithOutputMap) => Right(cpgWithOutputMap) + } finally { + if returnClosedCpg then cpg.close() // To not close cpg, and use it further, pass the returnClosedCpg as false + import java.io.File + val cpgOutputPath = s"$sourceRepoLocation/$outputDirectoryName/$cpgOutputFileName" + val cpgFile = new File(cpgOutputPath) + statsRecorder.justLogMessage( + s"Binary file size -- ${cpgFile.length()} in Bytes - ${cpgFile.length() * 0.000001} MB\n\n\n" + ) + } + case Failure(exception) => + logger.error("Error while parsing the source code!") + logger.debug("Error : ", exception) + MetricHandler.setScanStatus(false) + Left("Error while parsing the source code: " + exception.toString) + } + } + + override def applyTaggingAndExport(cpg: Cpg): Either[String, CpgWithOutputMap] = { + statsRecorder.initiateNewStage("Brewing result") + val result = applyFinalExport(cpg, TaggerCache(), Map.empty, s3DatabaseDetailsCache, appCache) match { + case Left(err) => Left(err) + case Right(outputMap) => Right(CpgWithOutputMap(cpg, outputMap)) + } + statsRecorder.endLastStage() + result + } + + override protected def applyFinalExport( + cpg: Cpg, + taggerCache: TaggerCache, + dataflowMap: Map[String, Path], + s3DatabaseDetailsCache: S3DatabaseDetailsCache, + appCache: AppCache + ): Either[String, Map[String, Json]] = { + + val errorMsgs = ListBuffer[String]() + + fileLinkingReportExport(cpg) match + case Left(err) => + errorMsgs.addOne(err) + Left(errorMsgs.mkString("\n")) + case Right(_) => Right(Map.empty) + } +} diff --git a/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala index 2ba9de501..c39778317 100644 --- a/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/javascript/processor/JavascriptProcessor.scala @@ -29,6 +29,7 @@ import ai.privado.dataflow.Dataflow import ai.privado.entrypoint.PrivadoInput import ai.privado.exporter.{ExcelExporter, JSONExporter} import ai.privado.languageEngine.base.processor.BaseProcessor +import ai.privado.languageEngine.javascript.metadata.FileImportMappingPassJS import ai.privado.languageEngine.javascript.passes.config.{JSPropertyLinkerPass, JsConfigPropertyPass} import ai.privado.languageEngine.javascript.semantic.Language.* import ai.privado.metric.MetricHandler @@ -65,7 +66,8 @@ class JavascriptProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean = true, databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), - propertyFilterCache: PropertyFilterCache = new PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) extends BaseProcessor( ruleCache, privadoInput, @@ -78,7 +80,8 @@ class JavascriptProcessor( statsRecorder, returnClosedCpg, databaseDetailsCache, - propertyFilterCache + propertyFilterCache, + fileLinkingMetadata ) { override val logger: Logger = LoggerFactory.getLogger(this.getClass) @@ -108,11 +111,23 @@ class JavascriptProcessor( } override def runPrivadoTagger(cpg: Cpg, taggerCache: TaggerCache): Unit = - cpg.runTagger(ruleCache, taggerCache, privadoInput, dataFlowCache, appCache, databaseDetailsCache, statsRecorder) + cpg.runTagger( + ruleCache, + taggerCache, + privadoInput, + dataFlowCache, + appCache, + databaseDetailsCache, + statsRecorder, + fileLinkingMetadata + ) override def applyDataflowAndPostProcessingPasses(cpg: Cpg): Unit = { super.applyDataflowAndPostProcessingPasses(cpg) JsSrc2Cpg.postProcessingPasses(cpg).foreach(_.createAndApply()) + if (privadoInput.fileLinkingReport) { + new FileImportMappingPassJS(cpg, fileLinkingMetadata).createAndApply() + } } override def processCpg(): Either[String, CpgWithOutputMap] = { diff --git a/src/main/scala/ai/privado/languageEngine/javascript/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/javascript/tagger/PrivadoTagger.scala index 2e72c3c57..f2a6c5e4d 100644 --- a/src/main/scala/ai/privado/languageEngine/javascript/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/javascript/tagger/PrivadoTagger.scala @@ -23,7 +23,7 @@ package ai.privado.languageEngine.javascript.tagger -import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, TaggerCache} +import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, FileLinkingMetadata, RuleCache, TaggerCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.feeder.PermissionSourceRule import ai.privado.languageEngine.javascript.config.JSDBConfigTagger @@ -56,7 +56,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { dataFlowCache: DataFlowCache, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Traversal[Tag] = { logger.info("Starting tagging") @@ -71,7 +72,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new RegularSinkTagger(cpg, ruleCache, databaseDetailsCache).createAndApply() - JSAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder) + JSAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder, fileLinkingMetadata) new GraphqlQueryParserPass(cpg, ruleCache, taggerCache).createAndApply() diff --git a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPISinkTagger.scala b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPISinkTagger.scala index f0a5fa717..623ebd0fa 100644 --- a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPISinkTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPISinkTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.javascript.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.tagger.sink.api.{APISinkByMethodFullNameTagger, APISinkTagger} import ai.privado.utility.StatsRecorder @@ -13,14 +13,16 @@ object JSAPISinkTagger extends APISinkTagger { ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Unit = { - super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder) + super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder, fileLinkingMetadata) new APISinkByMethodFullNameTagger(cpg, ruleCache).createAndApply() - new JSAPITagger(cpg, ruleCache, privadoInput = privadoInput, appCache = appCache).createAndApply() + new JSAPITagger(cpg, ruleCache, privadoInput = privadoInput, appCache = appCache, fileLinkingMetadata) + .createAndApply() new GraphqlAPITagger(cpg, ruleCache).createAndApply() diff --git a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala index 611d3b48a..347ffff8b 100644 --- a/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/javascript/tagger/sink/JSAPITagger.scala @@ -23,7 +23,7 @@ package ai.privado.languageEngine.javascript.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.dataflow.DuplicateFlowProcessor import ai.privado.entrypoint.{PrivadoInput, ScanProcessor} import ai.privado.model.{Constants, InternalTag, NodeType, RuleInfo} @@ -49,8 +49,13 @@ import ai.privado.utility.Utilities.{ import io.joern.dataflowengineoss.language.toExtendedCfgNode import overflowdb.BatchedUpdate -class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache) - extends APITagger(cpg, ruleCache, privadoInput, appCache) { +class JSAPITagger( + cpg: Cpg, + ruleCache: RuleCache, + privadoInput: PrivadoInput, + appCache: AppCache, + fileLinkingMetadata: FileLinkingMetadata +) extends APITagger(cpg, ruleCache, privadoInput, appCache, fileLinkingMetadata) { override val apis = cacheCall .name(APISINKS_REGEX) @@ -191,6 +196,12 @@ class JSAPITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput, ap val domains = ListBuffer[String]() if (apis.nonEmpty && apiInternalSources.nonEmpty) { val apiFlows = apis.reachableByFlows(apiInternalSources)(engineContext).toList + + if (privadoInput.fileLinkingReport) { + val dataflowFiles = apiFlows.map(_.elements.flatMap(_.file.name).dedup.l).l + fileLinkingMetadata.addToDataflowMap(dataflowFiles) + } + apiFlows.foreach(flow => { val literalCode = flow.elements.head.originalPropertyValue.getOrElse(flow.elements.head.code.split(" ").last) val apiNode = flow.elements.last diff --git a/src/main/scala/ai/privado/languageEngine/kotlin/processor/KotlinProcessor.scala b/src/main/scala/ai/privado/languageEngine/kotlin/processor/KotlinProcessor.scala index 6fce5f948..cbff415bb 100644 --- a/src/main/scala/ai/privado/languageEngine/kotlin/processor/KotlinProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/kotlin/processor/KotlinProcessor.scala @@ -6,6 +6,7 @@ import ai.privado.cache.{ AuditCache, DataFlowCache, DatabaseDetailsCache, + FileLinkingMetadata, PropertyFilterCache, RuleCache, S3DatabaseDetailsCache, @@ -55,7 +56,8 @@ class KotlinProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean = true, databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), - propertyFilterCache: PropertyFilterCache = PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) extends BaseProcessor( ruleCache, privadoInput, @@ -68,7 +70,8 @@ class KotlinProcessor( statsRecorder, returnClosedCpg, databaseDetailsCache, - propertyFilterCache + propertyFilterCache, + fileLinkingMetadata ) { override val logger = LoggerFactory.getLogger(getClass) private var cpgconfig = Config() @@ -104,7 +107,8 @@ class KotlinProcessor( dataFlowCache, appCache, databaseDetailsCache, - statsRecorder + statsRecorder, + fileLinkingMetadata ) override def processCpg(): Either[String, CpgWithOutputMap] = { diff --git a/src/main/scala/ai/privado/languageEngine/kotlin/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/kotlin/tagger/PrivadoTagger.scala index d71db4a87..76eaf4607 100644 --- a/src/main/scala/ai/privado/languageEngine/kotlin/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/kotlin/tagger/PrivadoTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.kotlin.tagger -import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, TaggerCache} +import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, FileLinkingMetadata, RuleCache, TaggerCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.feeder.PermissionSourceRule import ai.privado.languageEngine.java.feeder.StorageInheritRule @@ -34,7 +34,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { dataflowCache: DataFlowCache, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Traversal[Tag] = { logger.info("Starting tagging") @@ -64,9 +65,9 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new StorageAnnotationTagger(cpg, ruleCache).createAndApply() } - JavaAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder) + JavaAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder, fileLinkingMetadata) - FlinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder) + FlinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder, fileLinkingMetadata) new AndroidCollectionTagger( cpg, diff --git a/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala b/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala index 76d8663d7..35b27be06 100644 --- a/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/php/processor/PhpProcessor.scala @@ -57,7 +57,8 @@ class PhpProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean = true, databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), - propertyFilterCache: PropertyFilterCache = new PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) extends BaseProcessor( ruleCache, privadoInput, @@ -70,7 +71,8 @@ class PhpProcessor( statsRecorder, returnClosedCpg, databaseDetailsCache, - propertyFilterCache + propertyFilterCache, + fileLinkingMetadata ) { override val logger: Logger = LoggerFactory.getLogger(this.getClass) @@ -78,7 +80,16 @@ class PhpProcessor( override def applyPrivadoPasses(cpg: Cpg): List[CpgPassBase] = List[CpgPassBase]() override def runPrivadoTagger(cpg: Cpg, taggerCache: TaggerCache): Unit = - cpg.runTagger(ruleCache, taggerCache, privadoInput, dataFlowCache, appCache, databaseDetailsCache, statsRecorder) + cpg.runTagger( + ruleCache, + taggerCache, + privadoInput, + dataFlowCache, + appCache, + databaseDetailsCache, + statsRecorder, + fileLinkingMetadata + ) override def applyDataflowAndPostProcessingPasses(cpg: Cpg): Unit = { super.applyDataflowAndPostProcessingPasses(cpg) diff --git a/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala index c4d46a287..bedc167bb 100644 --- a/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/php/tagger/PrivadoTagger.scala @@ -23,7 +23,7 @@ package ai.privado.languageEngine.php.tagger -import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, TaggerCache} +import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, FileLinkingMetadata, RuleCache, TaggerCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.languageEngine.php.tagger.collection.MethodFullNameCollectionTagger import ai.privado.languageEngine.php.tagger.collection.AnnotationsCollectionTagger @@ -50,7 +50,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { dataFlowCache: DataFlowCache, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Traversal[Tag] = { logger.info("Beginning tagging") @@ -62,7 +63,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new ConfigCollectionTagger(cpg, rules, privadoInputConfig.sourceLocation.headOption.getOrElse("")).createAndApply() new MethodFullNameCollectionTagger(cpg, rules).createAndApply() - PhpAPISinkTagger.applyTagger(cpg, rules, privadoInputConfig, appCache, statsRecorder) + PhpAPISinkTagger.applyTagger(cpg, rules, privadoInputConfig, appCache, statsRecorder, fileLinkingMetadata) logger.info("Finished tagging") cpg.tag diff --git a/src/main/scala/ai/privado/languageEngine/php/tagger/sink/APITagger.scala b/src/main/scala/ai/privado/languageEngine/php/tagger/sink/APITagger.scala index 9750b50a5..5ba44a061 100644 --- a/src/main/scala/ai/privado/languageEngine/php/tagger/sink/APITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/php/tagger/sink/APITagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.php.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.{PrivadoInput, ScanProcessor} import ai.privado.languageEngine.java.language.{NodeStarters, StepsForProperty} import ai.privado.metric.MetricHandler @@ -17,8 +17,13 @@ import org.slf4j.LoggerFactory import scala.jdk.CollectionConverters.CollectionHasAsScala -class APITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache) - extends PrivadoParallelCpgPass[RuleInfo](cpg) { +class APITagger( + cpg: Cpg, + ruleCache: RuleCache, + privadoInput: PrivadoInput, + appCache: AppCache, + fileLinkingMetadata: FileLinkingMetadata +) extends PrivadoParallelCpgPass[RuleInfo](cpg) { private val logger = LoggerFactory.getLogger(this.getClass) val cacheCall: List[Call] = cpg.call.where(_.nameNot(Operators.ALL.asScala.toSeq: _*)).l val constructNameCall: List[Call] = cacheCall.where(_.name("__construct")).l @@ -65,7 +70,8 @@ class APITagger(cpg: Cpg, ruleCache: RuleCache, privadoInput: PrivadoInput, appC builder, ruleInfo, ruleCache, - privadoInput + privadoInput, + fileLinkingMetadata ) } } diff --git a/src/main/scala/ai/privado/languageEngine/php/tagger/sink/PhpAPISinkTagger.scala b/src/main/scala/ai/privado/languageEngine/php/tagger/sink/PhpAPISinkTagger.scala index 1808d0700..f4c33c31a 100644 --- a/src/main/scala/ai/privado/languageEngine/php/tagger/sink/PhpAPISinkTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/php/tagger/sink/PhpAPISinkTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.php.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.tagger.sink.api.{APISinkByMethodFullNameTagger, APISinkTagger} import ai.privado.utility.StatsRecorder @@ -13,13 +13,15 @@ object PhpAPISinkTagger extends APISinkTagger { ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Unit = { - super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder) + super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder, fileLinkingMetadata) new APISinkByMethodFullNameTagger(cpg, ruleCache).createAndApply() - new APITagger(cpg, ruleCache, privadoInput = privadoInput, appCache = appCache).createAndApply() + new APITagger(cpg, ruleCache, privadoInput = privadoInput, appCache = appCache, fileLinkingMetadata) + .createAndApply() } } diff --git a/src/main/scala/ai/privado/languageEngine/python/metadata/FileLinkingMetadataPassPython.scala b/src/main/scala/ai/privado/languageEngine/python/metadata/FileLinkingMetadataPassPython.scala new file mode 100644 index 000000000..6eacbe904 --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/python/metadata/FileLinkingMetadataPassPython.scala @@ -0,0 +1,105 @@ +package ai.privado.languageEngine.python.metadata + +import better.files.File +import ai.privado.cache.FileLinkingMetadata +import io.joern.x2cpg.passes.frontend.XImportResolverPass +import io.shiftleft.codepropertygraph.generated.Cpg +import io.shiftleft.codepropertygraph.generated.nodes.{Call, Member, Method, TypeDecl} +import io.shiftleft.semanticcpg.language.* + +import java.util.regex.Matcher +import java.io.File as JFile +import scala.collection.mutable + +class FileLinkingMetadataPassPython(cpg: Cpg, fileLinkingMetadata: FileLinkingMetadata) + extends XImportResolverPass(cpg) { + + private val moduleCache: mutable.HashMap[String, ImportableEntity] = mutable.HashMap.empty + + override def init(): Unit = { + cpg.typeDecl.isExternal(false).nameExact("").foreach { moduleType => + val modulePath = fileToPythonImportNotation(moduleType.filename) + cpg.method.fullNameExact(moduleType.fullName).headOption.foreach { moduleMethod => + moduleCache.put(modulePath, Module(moduleType, moduleMethod)) + moduleMethod.astChildren.foreach { + case moduleFunction: Method => + moduleCache.put(s"$modulePath.${moduleFunction.name}", ImportableFunction(moduleFunction)) + // Ignore types for functions that are used for method pointers + case moduleType: TypeDecl if moduleMethod.astChildren.isMethod.fullNameExact(moduleType.fullName).isEmpty => + moduleCache.put(s"$modulePath.${moduleType.name}", ImportableType(moduleType)) + case _ => // do nothing + } + } + moduleType.member.foreach { moduleMember => + moduleCache + .getOrElseUpdate(s"$modulePath.${moduleMember.name}", ModuleVariable(moduleType.fullName, moduleMember)) + } + } + } + + private def fileToPythonImportNotation(filename: String): String = + filename + .stripPrefix(codeRootDir) + .replaceAll(Matcher.quoteReplacement(JFile.separator), ".") + .stripSuffix(".py") + .stripSuffix(".__init__") + + override protected def optionalResolveImport( + fileName: String, + importCall: Call, + importedEntity: String, + importedAs: String, + diffGraph: DiffGraphBuilder + ): Unit = { + val currDir = File(codeRootDir) / fileName match + case x if x.isDirectory => x + case x => x.parent + + val importedEntityAsFullyQualifiedImport = + // If the path/entity uses Python's `from .import x` syntax, we will need to remove these + fileToPythonImportNotation(importedEntity.replaceFirst("^\\.+", "")) + val importedEntityAsRelativeImport = Seq( + fileToPythonImportNotation(currDir.pathAsString.stripPrefix(codeRootDir).stripPrefix(JFile.separator)), + importedEntityAsFullyQualifiedImport + ).filterNot(_.isBlank).mkString(".") + + // We evaluated both variations, based on what we could expect from different versions of Python and how the package + // layout is interpreted by the presence of lack of `__init__.py` files. Additionally, external packages are always + // fully qualified. + val resolvedImports = + Seq( + moduleCache.get(importedEntityAsRelativeImport), + moduleCache.get(importedEntityAsFullyQualifiedImport) + ).flatten.foreach(_.toResolvedImport(fileName: String)) + } + + private sealed trait ImportableEntity { + + def toResolvedImport(fileName: String): Unit + + } + + private case class Module(moduleType: TypeDecl, moduleMethod: Method) extends ImportableEntity { + override def toResolvedImport(fileName: String): Unit = { + fileLinkingMetadata.addToFileImportMap(fileName, moduleType.filename) + fileLinkingMetadata.addToFileImportMap(fileName, moduleMethod.filename) + } + + } + + private case class ModuleVariable(baseTypeFullName: String, member: Member) extends ImportableEntity { + + override def toResolvedImport(fileName: String): Unit = + fileLinkingMetadata.addToFileImportMap(fileName, member.file.name.head) + } + + private case class ImportableFunction(function: Method) extends ImportableEntity { + override def toResolvedImport(fileName: String): Unit = + fileLinkingMetadata.addToFileImportMap(fileName, function.filename) + } + + private case class ImportableType(typ: TypeDecl) extends ImportableEntity { + override def toResolvedImport(fileName: String): Unit = + fileLinkingMetadata.addToFileImportMap(fileName, typ.filename) + } +} diff --git a/src/main/scala/ai/privado/languageEngine/python/processor/PythonBaseCPGProcessor.scala b/src/main/scala/ai/privado/languageEngine/python/processor/PythonBaseCPGProcessor.scala new file mode 100644 index 000000000..10a68102d --- /dev/null +++ b/src/main/scala/ai/privado/languageEngine/python/processor/PythonBaseCPGProcessor.scala @@ -0,0 +1,118 @@ +package ai.privado.languageEngine.python.processor + +import ai.privado.cache.{ + AppCache, + AuditCache, + DataFlowCache, + DatabaseDetailsCache, + FileLinkingMetadata, + PropertyFilterCache, + RuleCache, + S3DatabaseDetailsCache, + TaggerCache +} +import ai.privado.entrypoint.PrivadoInput +import ai.privado.metric.MetricHandler +import ai.privado.model.Constants.{cpgOutputFileName, outputDirectoryName} +import ai.privado.model.CpgWithOutputMap +import ai.privado.utility.StatsRecorder +import io.circe.Json +import io.joern.dataflowengineoss.language.Path +import io.shiftleft.codepropertygraph.generated.Cpg +import org.slf4j.{Logger, LoggerFactory} + +import scala.collection.mutable.ListBuffer +import scala.util.{Failure, Success, Try} + +class PythonBaseCPGProcessor( + ruleCache: RuleCache, + privadoInput: PrivadoInput, + sourceRepoLocation: String, + dataFlowCache: DataFlowCache, + auditCache: AuditCache, + s3DatabaseDetailsCache: S3DatabaseDetailsCache, + appCache: AppCache, + statsRecorder: StatsRecorder, + returnClosedCpg: Boolean = true, + databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() +) extends PythonProcessor( + ruleCache, + privadoInput, + sourceRepoLocation, + dataFlowCache, + auditCache, + s3DatabaseDetailsCache, + appCache, + statsRecorder, + returnClosedCpg, + databaseDetailsCache, + propertyFilterCache, + fileLinkingMetadata + ) { + + override val logger: Logger = LoggerFactory.getLogger(this.getClass) + + override def tagAndExport(xtocpg: Try[Cpg]): Either[String, CpgWithOutputMap] = { + xtocpg match { + case Success(cpg) => + try { + statsRecorder.initiateNewStage("overriden overlay Processing") + applyOverridenPasses(cpg) + statsRecorder.endLastStage() + + statsRecorder.initiateNewStage("Run oss data flow") + applyDataflowAndPostProcessingPasses(cpg) + statsRecorder.endLastStage() + statsRecorder.setSupressSubstagesFlag(false) + applyTaggingAndExport(cpg) match + case Left(err) => + logger.debug(s"Errors captured in scanning : $err") + Left(err) + case Right(cpgWithOutputMap) => Right(cpgWithOutputMap) + } finally { + if returnClosedCpg then cpg.close() // To not close cpg, and use it further, pass the returnClosedCpg as false + import java.io.File + val cpgOutputPath = s"$sourceRepoLocation/$outputDirectoryName/$cpgOutputFileName" + val cpgFile = new File(cpgOutputPath) + statsRecorder.justLogMessage( + s"Binary file size -- ${cpgFile.length()} in Bytes - ${cpgFile.length() * 0.000001} MB\n\n\n" + ) + } + case Failure(exception) => + logger.error("Error while parsing the source code!") + logger.debug("Error : ", exception) + MetricHandler.setScanStatus(false) + Left("Error while parsing the source code: " + exception.toString) + } + } + + override def applyTaggingAndExport(cpg: Cpg): Either[String, CpgWithOutputMap] = { + statsRecorder.initiateNewStage("Brewing result") + val result = applyFinalExport(cpg, TaggerCache(), Map.empty, s3DatabaseDetailsCache, appCache) match { + case Left(err) => Left(err) + case Right(outputMap) => Right(CpgWithOutputMap(cpg, outputMap)) + } + statsRecorder.endLastStage() + result + } + + override protected def applyFinalExport( + cpg: Cpg, + taggerCache: TaggerCache, + dataflowMap: Map[String, Path], + s3DatabaseDetailsCache: S3DatabaseDetailsCache, + appCache: AppCache + ): Either[String, Map[String, Json]] = { + + val errorMsgs = ListBuffer[String]() + + fileLinkingReportExport(cpg) match + case Left(err) => + errorMsgs.addOne(err) + Left(errorMsgs.mkString("\n")) + case Right(_) => Right(Map.empty) + } + +} diff --git a/src/main/scala/ai/privado/languageEngine/python/processor/PythonProcessor.scala b/src/main/scala/ai/privado/languageEngine/python/processor/PythonProcessor.scala index 7bac48e60..37317cfe9 100644 --- a/src/main/scala/ai/privado/languageEngine/python/processor/PythonProcessor.scala +++ b/src/main/scala/ai/privado/languageEngine/python/processor/PythonProcessor.scala @@ -4,12 +4,13 @@ import ai.privado.entrypoint.PrivadoInput import ai.privado.cache.* import ai.privado.languageEngine.base.processor.BaseProcessor import ai.privado.languageEngine.python.config.PythonConfigPropertyPass +import ai.privado.languageEngine.python.metadata.FileLinkingMetadataPassPython import ai.privado.languageEngine.python.passes.PrivadoPythonTypeHintCallLinker import ai.privado.languageEngine.python.passes.config.PythonPropertyLinkerPass import ai.privado.languageEngine.python.semantic.Language.* import ai.privado.languageEngine.python.tagger.PythonS3Tagger import ai.privado.model.Constants.* -import ai.privado.model.{CpgWithOutputMap, Constants, Language} +import ai.privado.model.{Constants, CpgWithOutputMap, Language} import ai.privado.passes.* import ai.privado.semantic.Language.* import ai.privado.utility.Utilities.createCpgFolder @@ -38,7 +39,8 @@ class PythonProcessor( statsRecorder: StatsRecorder, returnClosedCpg: Boolean = true, databaseDetailsCache: DatabaseDetailsCache = new DatabaseDetailsCache(), - propertyFilterCache: PropertyFilterCache = new PropertyFilterCache() + propertyFilterCache: PropertyFilterCache = new PropertyFilterCache(), + fileLinkingMetadata: FileLinkingMetadata = new FileLinkingMetadata() ) extends BaseProcessor( ruleCache, privadoInput, @@ -51,7 +53,8 @@ class PythonProcessor( statsRecorder, returnClosedCpg, databaseDetailsCache, - propertyFilterCache + propertyFilterCache, + fileLinkingMetadata ) { override val logger = LoggerFactory.getLogger(getClass) @@ -82,7 +85,8 @@ class PythonProcessor( s3DatabaseDetailsCache, appCache, databaseDetailsCache, - statsRecorder + statsRecorder, + fileLinkingMetadata ) } @@ -96,6 +100,10 @@ class PythonProcessor( new PrivadoPythonTypeHintCallLinker(cpg).createAndApply() new NaiveCallLinker(cpg).createAndApply() new AstLinkerPass(cpg).createAndApply() + + if (privadoInput.fileLinkingReport) { + new FileLinkingMetadataPassPython(cpg, fileLinkingMetadata).createAndApply() + } } override def processCpg(): Either[String, CpgWithOutputMap] = { diff --git a/src/main/scala/ai/privado/languageEngine/python/tagger/PrivadoTagger.scala b/src/main/scala/ai/privado/languageEngine/python/tagger/PrivadoTagger.scala index 8fa25a0d3..7cd8fcc1d 100644 --- a/src/main/scala/ai/privado/languageEngine/python/tagger/PrivadoTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/python/tagger/PrivadoTagger.scala @@ -1,6 +1,14 @@ package ai.privado.languageEngine.python.tagger -import ai.privado.cache.{AppCache, DataFlowCache, DatabaseDetailsCache, RuleCache, S3DatabaseDetailsCache, TaggerCache} +import ai.privado.cache.{ + AppCache, + DataFlowCache, + DatabaseDetailsCache, + FileLinkingMetadata, + RuleCache, + S3DatabaseDetailsCache, + TaggerCache +} import ai.privado.entrypoint.PrivadoInput import ai.privado.languageEngine.python.config.PythonDBConfigTagger import ai.privado.languageEngine.python.feeder.StorageInheritRule @@ -35,7 +43,8 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { s3DatabaseDetailsCache: S3DatabaseDetailsCache, appCache: AppCache, databaseDetailsCache: DatabaseDetailsCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Traversal[Tag] = { logger.info("Starting tagging") @@ -47,7 +56,7 @@ class PrivadoTagger(cpg: Cpg) extends PrivadoBaseTagger { new SqlQueryTagger(cpg, ruleCache).createAndApply() - PythonAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder) + PythonAPISinkTagger.applyTagger(cpg, ruleCache, privadoInputConfig, appCache, statsRecorder, fileLinkingMetadata) new PythonDBConfigTagger(cpg, databaseDetailsCache).createAndApply() diff --git a/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPISinkTagger.scala b/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPISinkTagger.scala index 913bf2990..e240ce266 100644 --- a/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPISinkTagger.scala +++ b/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPISinkTagger.scala @@ -1,6 +1,6 @@ package ai.privado.languageEngine.python.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.PrivadoInput import ai.privado.tagger.sink.api.{APISinkByMethodFullNameTagger, APISinkTagger} import ai.privado.utility.StatsRecorder @@ -13,13 +13,15 @@ object PythonAPISinkTagger extends APISinkTagger { ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ): Unit = { - super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder) + super.applyTagger(cpg, ruleCache, privadoInput, appCache, statsRecorder, fileLinkingMetadata) new APISinkByMethodFullNameTagger(cpg, ruleCache).createAndApply() - new PythonAPITagger(cpg, ruleCache, privadoInput = privadoInput, appCache, statsRecorder).createAndApply() + new PythonAPITagger(cpg, ruleCache, privadoInput = privadoInput, appCache, statsRecorder, fileLinkingMetadata) + .createAndApply() } } diff --git a/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala b/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala index dc4f6c9a5..27c02809c 100644 --- a/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala +++ b/src/main/scala/ai/privado/languageEngine/python/tagger/sink/PythonAPITagger.scala @@ -22,7 +22,7 @@ */ package ai.privado.languageEngine.python.tagger.sink -import ai.privado.cache.{AppCache, RuleCache} +import ai.privado.cache.{AppCache, FileLinkingMetadata, RuleCache} import ai.privado.entrypoint.{PrivadoInput, ScanProcessor} import ai.privado.languageEngine.java.language.{NodeStarters, StepsForProperty} import ai.privado.languageEngine.java.semantic.JavaSemanticGenerator @@ -44,7 +44,8 @@ class PythonAPITagger( ruleCache: RuleCache, privadoInput: PrivadoInput, appCache: AppCache, - statsRecorder: StatsRecorder + statsRecorder: StatsRecorder, + fileLinkingMetadata: FileLinkingMetadata ) extends PrivadoParallelCpgPass[RuleInfo](cpg) { private val logger = LoggerFactory.getLogger(this.getClass) val cacheCall = cpg.call.where(_.nameNot("(