Skip to content

Commit

Permalink
Adds a new frontend to parse only header files in c/c++ (#74)
Browse files Browse the repository at this point in the history
Signed-off-by: Prabhu Subramanian <prabhu@appthreat.com>
  • Loading branch information
prabhu authored Sep 9, 2023
1 parent aa25b7a commit 56e616f
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 9 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Atom (⚛)

Atom is a novel intermediate representation for applications and a standalone tool powered by the [joern](https://joern.io) library. The intermediate representation is optimized for operations typically used for application analytics and machine learning, including [slicing](./specification/docs/slices.md) and [vectoring](./specification/docs/vectors.md).
Atom is a novel intermediate representation for applications and a standalone tool powered by the [joern](https://joern.io) library. The intermediate representation is optimized for operations typically used for application analytics and machine learning, including [slicing](./specification/docs/slices.md) and [vectoring](./specification/docs/vectors.md).

Our vision is to make atom useful for a number of use cases such as:

Expand Down Expand Up @@ -94,6 +94,7 @@ Learn more about [slices](./specification/docs/slices.md) or view some [samples]
## Languages supported

- C/C++ (Requires Java 17 or above)
- H (C/C++ Header files alone)
- Java (Requires compilation)
- Jar
- Android APK (Requires Android SDK. Set the environment variable `ANDROID_HOME`)
Expand Down
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ ThisBuild / organization := "io.appthreat"
ThisBuild / version := "1.0.0"
ThisBuild / scalaVersion := "3.3.0"

val joernVersion = "2.0.74"
val joernVersion = "2.0.81"

lazy val atom = Projects.atom

val astGenVersion = "3.4.0"
val astGenVersion = "3.5.0"

libraryDependencies ++= Seq(
"com.github.pathikrit" %% "better-files" % "3.9.2",
Expand Down
4 changes: 2 additions & 2 deletions log4j2.xml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Configuration status="ERROR">
<Appenders>
<Console name="Console" target="SYSTEM_ERR">
<PatternLayout pattern="%d{yyy-MM-dd HH:mm:ss.SSS} %p %c{0}: %msg%n"/>
</Console>
</Appenders>
<Loggers>
<Logger name="io.shiftleft.overflowdb" level="warn" />
<Logger name="io.shiftleft.overflowdb" level="ERROR" />
<Root level="ERROR">
<AppenderRef ref="Console" />
</Root>
Expand Down
14 changes: 13 additions & 1 deletion src/main/scala/io/appthreat/atom/Atom.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import io.appthreat.atom.dataflows.{DataFlowGraph, OssDataFlow, OssDataFlowOptio
import io.appthreat.atom.parsedeps.{AtomSlice, parseDependencies}
import io.appthreat.atom.passes.{SafeJSTypeRecoveryPass, TypeHintPass}
import io.appthreat.atom.slicing.*
import io.appthreat.atom.frontends.C2Atom
import io.joern.c2cpg.{C2Cpg, Config as CConfig}
import io.joern.javasrc2cpg.{JavaSrc2Cpg, Config as JavaConfig}
import io.joern.jimple2cpg.{Jimple2Cpg, Config as JimpleConfig}
Expand Down Expand Up @@ -300,10 +301,21 @@ object Atom {
case _ => DEFAULT_ATOM_OUT_FILE

(language match {
case "H" | "HPP" =>
new C2Atom()
.createCpg(
CConfig(includeComments = false, logProblems = false, includePathsAutoDiscovery = false)
.withLogPreprocessor(false)
.withIncludePaths(C2CPG_INCLUDE_PATHS.toSet)
.withInputPath(config.inputPath.pathAsString)
.withOutputPath(outputAtomFile)
.withIgnoredFilesRegex(".*(test|docs|examples|samples|mocks).*")
)
case Languages.C | Languages.NEWC | "CPP" | "C++" =>
new C2Cpg()
.createCpgWithOverlays(
CConfig(includeComments = false, logProblems = false, includePathsAutoDiscovery = true)
CConfig(includeComments = false, logProblems = false, includePathsAutoDiscovery = false)
.withLogPreprocessor(false)
.withIncludePaths(C2CPG_INCLUDE_PATHS.toSet)
.withInputPath(config.inputPath.pathAsString)
.withOutputPath(outputAtomFile)
Expand Down
54 changes: 54 additions & 0 deletions src/main/scala/io/appthreat/atom/frontends/AstCreationPass.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package io.appthreat.atom.frontends

import io.joern.c2cpg.Config
import io.joern.c2cpg.astcreation.AstCreator
import io.joern.c2cpg.parser.FileDefaults
import io.joern.c2cpg.utils.TimeUtils
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.passes.ConcurrentWriterCpgPass
import io.joern.x2cpg.SourceFiles

import java.nio.file.Paths
import java.util.concurrent.ConcurrentHashMap
import java.util.regex.Pattern
import scala.util.matching.Regex

class AstCreationPass(cpg: Cpg, config: Config) extends ConcurrentWriterCpgPass[String](cpg) {

private val file2OffsetTable: ConcurrentHashMap[String, Array[Int]] = new ConcurrentHashMap()
private val parser: CdtParser = new CdtParser(config)

private val EscapedFileSeparator = Pattern.quote(java.io.File.separator)
private val DefaultIgnoredFolders: List[Regex] = List(
"\\..*".r,
s"(.*[$EscapedFileSeparator])?tests?[$EscapedFileSeparator].*".r,
s"(.*[$EscapedFileSeparator])?CMakeFiles[$EscapedFileSeparator].*".r
)

override def generateParts(): Array[String] =
SourceFiles
.determine(
config.inputPath,
FileDefaults.HEADER_FILE_EXTENSIONS,
config.withDefaultIgnoredFilesRegex(DefaultIgnoredFolders)
)
.toArray

override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = {
val path = Paths.get(filename).toAbsolutePath
val relPath = SourceFiles.toRelativePath(path.toString, config.inputPath)
val (_, _) = TimeUtils.time {
val parseResult = parser.parse(path)
parseResult match {
case Some(translationUnit) =>
val localDiff =
new AstCreator(relPath, config, translationUnit, file2OffsetTable)(config.schemaValidation).createAst()
diffGraph.absorb(localDiff)
true
case None =>
false
}
}
}

}
22 changes: 22 additions & 0 deletions src/main/scala/io/appthreat/atom/frontends/C2Atom.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package io.appthreat.atom.frontends

import io.joern.c2cpg.Config
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.Languages
import io.joern.x2cpg.passes.frontend.MetaDataPass
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.X2CpgFrontend
import io.shiftleft.semanticcpg.layers.{LayerCreator, LayerCreatorContext}

import scala.util.Try

class C2Atom extends X2CpgFrontend[Config] {

def createCpg(config: Config): Try[Cpg] = {
withNewEmptyCpg(config.outputPath, config) { (cpg, config) =>
new MetaDataPass(cpg, Languages.NEWC, config.inputPath).createAndApply()
new AstCreationPass(cpg, config).createAndApply()
}
}

}
94 changes: 94 additions & 0 deletions src/main/scala/io/appthreat/atom/frontends/CdtParser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package io.appthreat.atom.frontends

import better.files.File
import io.appthreat.atom.frontends.CdtParser.ParseResult
import io.joern.c2cpg.Config
import io.joern.c2cpg.parser.{CustomFileContentProvider, FileDefaults, HeaderFileFinder, ParserConfig}
import io.shiftleft.utils.IOUtils
import org.eclipse.cdt.core.dom.ast.gnu.c.GCCLanguage
import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage
import org.eclipse.cdt.core.dom.ast.{IASTPreprocessorStatement, IASTTranslationUnit}
import org.eclipse.cdt.core.model.ILanguage
import org.eclipse.cdt.core.parser.{DefaultLogService, ScannerInfo}
import org.eclipse.cdt.core.parser.FileContent
import org.eclipse.cdt.internal.core.dom.parser.cpp.semantics.CPPVisitor
import org.slf4j.LoggerFactory

import java.nio.file.{NoSuchFileException, Path}
import scala.jdk.CollectionConverters.*

object CdtParser {

private case class ParseResult(translationUnit: Option[IASTTranslationUnit], failure: Option[Throwable] = None)

def readFileAsFileContent(path: Path): FileContent = {
val lines = IOUtils.readLinesInFile(path).mkString("\n").toArray
FileContent.create(path.toString, true, lines)
}

}

class CdtParser(config: Config) {

import io.joern.c2cpg.parser.CdtParser._

private val headerFileFinder = new HeaderFileFinder(config.inputPath)
private val parserConfig = ParserConfig.fromConfig(config)
private val definedSymbols = parserConfig.definedSymbols.asJava
private val includePaths = parserConfig.userIncludePaths
private val log = new DefaultLogService

// enables parsing of code behind disabled preprocessor defines:
private val opts: Int = ILanguage.OPTION_PARSE_INACTIVE_CODE

private def createParseLanguage(file: Path): ILanguage = {
if (FileDefaults.isCPPFile(file.toString)) {
GPPLanguage.getDefault
} else {
GCCLanguage.getDefault
}
}

private def createScannerInfo(file: Path): ScannerInfo = {
val additionalIncludes =
if (FileDefaults.isCPPFile(file.toString)) parserConfig.systemIncludePathsCPP
else parserConfig.systemIncludePathsC
new ScannerInfo(definedSymbols, (includePaths ++ additionalIncludes).map(_.toString).toArray)
}

private def parseInternal(file: Path): ParseResult = {
val realPath = File(file)
if (realPath.isRegularFile) { // handling potentially broken symlinks
try {
val fileContent = readFileAsFileContent(realPath.path)
val fileContentProvider = new CustomFileContentProvider(headerFileFinder)
val lang = createParseLanguage(realPath.path)
val scannerInfo = createScannerInfo(realPath.path)
val translationUnit = lang.getASTTranslationUnit(fileContent, scannerInfo, fileContentProvider, null, opts, log)
ParseResult(Option(translationUnit))
} catch {
case u: UnsupportedClassVersionError =>
System.exit(1)
ParseResult(None, failure = Option(u)) // return value to make the compiler happy
case e: Throwable =>
ParseResult(None, failure = Option(e))
}
} else {
ParseResult(
None,
failure = Option(new NoSuchFileException(s"File '$realPath' does not exist. Check for broken symlinks!"))
)
}
}

def parse(file: Path): Option[IASTTranslationUnit] = {
val parseResult = parseInternal(file)
parseResult match {
case ParseResult(Some(t), _) =>
Option(t)
case ParseResult(_, _) =>
None
}
}

}
4 changes: 2 additions & 2 deletions wrapper/nodejs/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion wrapper/nodejs/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@appthreat/atom",
"version": "1.1.9",
"version": "1.1.10",
"description": "Create atom (⚛) representation for your application, packages and libraries",
"exports": "./index.js",
"type": "module",
Expand Down

0 comments on commit 56e616f

Please sign in to comment.