From 56e616f5a0e616dfce0e97d86bcec9527144fd56 Mon Sep 17 00:00:00 2001 From: prabhu Date: Sat, 9 Sep 2023 02:21:11 +0100 Subject: [PATCH] Adds a new frontend to parse only header files in c/c++ (#74) Signed-off-by: Prabhu Subramanian --- README.md | 3 +- build.sbt | 4 +- log4j2.xml | 4 +- src/main/scala/io/appthreat/atom/Atom.scala | 14 ++- .../atom/frontends/AstCreationPass.scala | 54 +++++++++++ .../io/appthreat/atom/frontends/C2Atom.scala | 22 +++++ .../appthreat/atom/frontends/CdtParser.scala | 94 +++++++++++++++++++ wrapper/nodejs/package-lock.json | 4 +- wrapper/nodejs/package.json | 2 +- 9 files changed, 192 insertions(+), 9 deletions(-) create mode 100644 src/main/scala/io/appthreat/atom/frontends/AstCreationPass.scala create mode 100644 src/main/scala/io/appthreat/atom/frontends/C2Atom.scala create mode 100644 src/main/scala/io/appthreat/atom/frontends/CdtParser.scala diff --git a/README.md b/README.md index f047f56..5059c20 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Atom (⚛) -Atom is a novel intermediate representation for applications and a standalone tool powered by the [joern](https://joern.io) library. The intermediate representation is optimized for operations typically used for application analytics and machine learning, including [slicing](./specification/docs/slices.md) and [vectoring](./specification/docs/vectors.md). +Atom is a novel intermediate representation for applications and a standalone tool powered by the [joern](https://joern.io) library. The intermediate representation is optimized for operations typically used for application analytics and machine learning, including [slicing](./specification/docs/slices.md) and [vectoring](./specification/docs/vectors.md). Our vision is to make atom useful for a number of use cases such as: @@ -94,6 +94,7 @@ Learn more about [slices](./specification/docs/slices.md) or view some [samples] ## Languages supported - C/C++ (Requires Java 17 or above) +- H (C/C++ Header files alone) - Java (Requires compilation) - Jar - Android APK (Requires Android SDK. Set the environment variable `ANDROID_HOME`) diff --git a/build.sbt b/build.sbt index 703a43f..998d6b3 100644 --- a/build.sbt +++ b/build.sbt @@ -3,11 +3,11 @@ ThisBuild / organization := "io.appthreat" ThisBuild / version := "1.0.0" ThisBuild / scalaVersion := "3.3.0" -val joernVersion = "2.0.74" +val joernVersion = "2.0.81" lazy val atom = Projects.atom -val astGenVersion = "3.4.0" +val astGenVersion = "3.5.0" libraryDependencies ++= Seq( "com.github.pathikrit" %% "better-files" % "3.9.2", diff --git a/log4j2.xml b/log4j2.xml index 4752de6..cbf1fc6 100644 --- a/log4j2.xml +++ b/log4j2.xml @@ -1,12 +1,12 @@ - + - + diff --git a/src/main/scala/io/appthreat/atom/Atom.scala b/src/main/scala/io/appthreat/atom/Atom.scala index 1dd3f09..4accf7b 100644 --- a/src/main/scala/io/appthreat/atom/Atom.scala +++ b/src/main/scala/io/appthreat/atom/Atom.scala @@ -6,6 +6,7 @@ import io.appthreat.atom.dataflows.{DataFlowGraph, OssDataFlow, OssDataFlowOptio import io.appthreat.atom.parsedeps.{AtomSlice, parseDependencies} import io.appthreat.atom.passes.{SafeJSTypeRecoveryPass, TypeHintPass} import io.appthreat.atom.slicing.* +import io.appthreat.atom.frontends.C2Atom import io.joern.c2cpg.{C2Cpg, Config as CConfig} import io.joern.javasrc2cpg.{JavaSrc2Cpg, Config as JavaConfig} import io.joern.jimple2cpg.{Jimple2Cpg, Config as JimpleConfig} @@ -300,10 +301,21 @@ object Atom { case _ => DEFAULT_ATOM_OUT_FILE (language match { + case "H" | "HPP" => + new C2Atom() + .createCpg( + CConfig(includeComments = false, logProblems = false, includePathsAutoDiscovery = false) + .withLogPreprocessor(false) + .withIncludePaths(C2CPG_INCLUDE_PATHS.toSet) + .withInputPath(config.inputPath.pathAsString) + .withOutputPath(outputAtomFile) + .withIgnoredFilesRegex(".*(test|docs|examples|samples|mocks).*") + ) case Languages.C | Languages.NEWC | "CPP" | "C++" => new C2Cpg() .createCpgWithOverlays( - CConfig(includeComments = false, logProblems = false, includePathsAutoDiscovery = true) + CConfig(includeComments = false, logProblems = false, includePathsAutoDiscovery = false) + .withLogPreprocessor(false) .withIncludePaths(C2CPG_INCLUDE_PATHS.toSet) .withInputPath(config.inputPath.pathAsString) .withOutputPath(outputAtomFile) diff --git a/src/main/scala/io/appthreat/atom/frontends/AstCreationPass.scala b/src/main/scala/io/appthreat/atom/frontends/AstCreationPass.scala new file mode 100644 index 0000000..c888961 --- /dev/null +++ b/src/main/scala/io/appthreat/atom/frontends/AstCreationPass.scala @@ -0,0 +1,54 @@ +package io.appthreat.atom.frontends + +import io.joern.c2cpg.Config +import io.joern.c2cpg.astcreation.AstCreator +import io.joern.c2cpg.parser.FileDefaults +import io.joern.c2cpg.utils.TimeUtils +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.passes.ConcurrentWriterCpgPass +import io.joern.x2cpg.SourceFiles + +import java.nio.file.Paths +import java.util.concurrent.ConcurrentHashMap +import java.util.regex.Pattern +import scala.util.matching.Regex + +class AstCreationPass(cpg: Cpg, config: Config) extends ConcurrentWriterCpgPass[String](cpg) { + + private val file2OffsetTable: ConcurrentHashMap[String, Array[Int]] = new ConcurrentHashMap() + private val parser: CdtParser = new CdtParser(config) + + private val EscapedFileSeparator = Pattern.quote(java.io.File.separator) + private val DefaultIgnoredFolders: List[Regex] = List( + "\\..*".r, + s"(.*[$EscapedFileSeparator])?tests?[$EscapedFileSeparator].*".r, + s"(.*[$EscapedFileSeparator])?CMakeFiles[$EscapedFileSeparator].*".r + ) + + override def generateParts(): Array[String] = + SourceFiles + .determine( + config.inputPath, + FileDefaults.HEADER_FILE_EXTENSIONS, + config.withDefaultIgnoredFilesRegex(DefaultIgnoredFolders) + ) + .toArray + + override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = { + val path = Paths.get(filename).toAbsolutePath + val relPath = SourceFiles.toRelativePath(path.toString, config.inputPath) + val (_, _) = TimeUtils.time { + val parseResult = parser.parse(path) + parseResult match { + case Some(translationUnit) => + val localDiff = + new AstCreator(relPath, config, translationUnit, file2OffsetTable)(config.schemaValidation).createAst() + diffGraph.absorb(localDiff) + true + case None => + false + } + } + } + +} diff --git a/src/main/scala/io/appthreat/atom/frontends/C2Atom.scala b/src/main/scala/io/appthreat/atom/frontends/C2Atom.scala new file mode 100644 index 0000000..48fbc42 --- /dev/null +++ b/src/main/scala/io/appthreat/atom/frontends/C2Atom.scala @@ -0,0 +1,22 @@ +package io.appthreat.atom.frontends + +import io.joern.c2cpg.Config +import io.shiftleft.codepropertygraph.Cpg +import io.shiftleft.codepropertygraph.generated.Languages +import io.joern.x2cpg.passes.frontend.MetaDataPass +import io.joern.x2cpg.X2Cpg.withNewEmptyCpg +import io.joern.x2cpg.X2CpgFrontend +import io.shiftleft.semanticcpg.layers.{LayerCreator, LayerCreatorContext} + +import scala.util.Try + +class C2Atom extends X2CpgFrontend[Config] { + + def createCpg(config: Config): Try[Cpg] = { + withNewEmptyCpg(config.outputPath, config) { (cpg, config) => + new MetaDataPass(cpg, Languages.NEWC, config.inputPath).createAndApply() + new AstCreationPass(cpg, config).createAndApply() + } + } + +} diff --git a/src/main/scala/io/appthreat/atom/frontends/CdtParser.scala b/src/main/scala/io/appthreat/atom/frontends/CdtParser.scala new file mode 100644 index 0000000..d3188db --- /dev/null +++ b/src/main/scala/io/appthreat/atom/frontends/CdtParser.scala @@ -0,0 +1,94 @@ +package io.appthreat.atom.frontends + +import better.files.File +import io.appthreat.atom.frontends.CdtParser.ParseResult +import io.joern.c2cpg.Config +import io.joern.c2cpg.parser.{CustomFileContentProvider, FileDefaults, HeaderFileFinder, ParserConfig} +import io.shiftleft.utils.IOUtils +import org.eclipse.cdt.core.dom.ast.gnu.c.GCCLanguage +import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage +import org.eclipse.cdt.core.dom.ast.{IASTPreprocessorStatement, IASTTranslationUnit} +import org.eclipse.cdt.core.model.ILanguage +import org.eclipse.cdt.core.parser.{DefaultLogService, ScannerInfo} +import org.eclipse.cdt.core.parser.FileContent +import org.eclipse.cdt.internal.core.dom.parser.cpp.semantics.CPPVisitor +import org.slf4j.LoggerFactory + +import java.nio.file.{NoSuchFileException, Path} +import scala.jdk.CollectionConverters.* + +object CdtParser { + + private case class ParseResult(translationUnit: Option[IASTTranslationUnit], failure: Option[Throwable] = None) + + def readFileAsFileContent(path: Path): FileContent = { + val lines = IOUtils.readLinesInFile(path).mkString("\n").toArray + FileContent.create(path.toString, true, lines) + } + +} + +class CdtParser(config: Config) { + + import io.joern.c2cpg.parser.CdtParser._ + + private val headerFileFinder = new HeaderFileFinder(config.inputPath) + private val parserConfig = ParserConfig.fromConfig(config) + private val definedSymbols = parserConfig.definedSymbols.asJava + private val includePaths = parserConfig.userIncludePaths + private val log = new DefaultLogService + + // enables parsing of code behind disabled preprocessor defines: + private val opts: Int = ILanguage.OPTION_PARSE_INACTIVE_CODE + + private def createParseLanguage(file: Path): ILanguage = { + if (FileDefaults.isCPPFile(file.toString)) { + GPPLanguage.getDefault + } else { + GCCLanguage.getDefault + } + } + + private def createScannerInfo(file: Path): ScannerInfo = { + val additionalIncludes = + if (FileDefaults.isCPPFile(file.toString)) parserConfig.systemIncludePathsCPP + else parserConfig.systemIncludePathsC + new ScannerInfo(definedSymbols, (includePaths ++ additionalIncludes).map(_.toString).toArray) + } + + private def parseInternal(file: Path): ParseResult = { + val realPath = File(file) + if (realPath.isRegularFile) { // handling potentially broken symlinks + try { + val fileContent = readFileAsFileContent(realPath.path) + val fileContentProvider = new CustomFileContentProvider(headerFileFinder) + val lang = createParseLanguage(realPath.path) + val scannerInfo = createScannerInfo(realPath.path) + val translationUnit = lang.getASTTranslationUnit(fileContent, scannerInfo, fileContentProvider, null, opts, log) + ParseResult(Option(translationUnit)) + } catch { + case u: UnsupportedClassVersionError => + System.exit(1) + ParseResult(None, failure = Option(u)) // return value to make the compiler happy + case e: Throwable => + ParseResult(None, failure = Option(e)) + } + } else { + ParseResult( + None, + failure = Option(new NoSuchFileException(s"File '$realPath' does not exist. Check for broken symlinks!")) + ) + } + } + + def parse(file: Path): Option[IASTTranslationUnit] = { + val parseResult = parseInternal(file) + parseResult match { + case ParseResult(Some(t), _) => + Option(t) + case ParseResult(_, _) => + None + } + } + +} diff --git a/wrapper/nodejs/package-lock.json b/wrapper/nodejs/package-lock.json index eb68283..9a1c189 100644 --- a/wrapper/nodejs/package-lock.json +++ b/wrapper/nodejs/package-lock.json @@ -1,12 +1,12 @@ { "name": "@appthreat/atom", - "version": "1.1.9", + "version": "1.1.10", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@appthreat/atom", - "version": "1.1.9", + "version": "1.1.10", "license": "Apache-2.0", "dependencies": { "@babel/parser": "^7.22.15", diff --git a/wrapper/nodejs/package.json b/wrapper/nodejs/package.json index 4cd7432..a34328d 100644 --- a/wrapper/nodejs/package.json +++ b/wrapper/nodejs/package.json @@ -1,6 +1,6 @@ { "name": "@appthreat/atom", - "version": "1.1.9", + "version": "1.1.10", "description": "Create atom (⚛) representation for your application, packages and libraries", "exports": "./index.js", "type": "module",