diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index 94891fa..aca891d 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -67,3 +67,21 @@ jobs: labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha,scope=chen cache-to: type=gha,mode=max,scope=chen + - name: Extract metadata (tags, labels) for Docker + id: meta2 + uses: docker/metadata-action@v4 + with: + images: | + ghcr.io/appthreat/chen-note + + - name: Build and push Docker images + uses: docker/build-push-action@v4 + with: + context: . + file: ci/Dockerfile-note + platforms: linux/amd64,linux/arm64 + push: true + tags: ${{ steps.meta2.outputs.tags }} + labels: ${{ steps.meta2.outputs.labels }} + cache-from: type=gha,scope=chen-note + cache-to: type=gha,mode=max,scope=chen-note diff --git a/README.md b/README.md index 8500cba..e0d6915 100644 --- a/README.md +++ b/README.md @@ -1 +1,3 @@ Code Hierarchy Exploration Net + + diff --git a/build.sbt b/build.sbt index a2d3d25..6d74c32 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "chen" ThisBuild / organization := "io.appthreat" -ThisBuild / version := "0.0.3" +ThisBuild / version := "0.0.4" ThisBuild / scalaVersion := "3.3.0" val cpgVersion = "1.4.22" diff --git a/ci/Dockerfile b/ci/Dockerfile index e2520a7..829e6e9 100644 --- a/ci/Dockerfile +++ b/ci/Dockerfile @@ -25,9 +25,15 @@ ENV JAVA_VERSION=$JAVA_VERSION \ PYTHON_CMD=python3 \ PYTHONUNBUFFERED=1 \ PYTHONIOENCODING="utf-8" \ + JAVA_OPTS="-XX:+UseG1GC -XX:+ExplicitGCInvokesConcurrent -XX:+ParallelRefProcEnabled -XX:+UseStringDeduplication -XX:+UnlockExperimentalVMOptions -XX:G1NewSizePercent=20 -XX:+UnlockDiagnosticVMOptions -XX:G1SummarizeRSetStatsPeriod=1" \ + JOERN_DATAFLOW_TRACKED_WIDTH=128 \ + SCALAPY_PYTHON_LIBRARY=python3.11 \ ANDROID_HOME=/opt/android-sdk-linux ENV PATH=${PATH}:/opt/platform:${JAVA_HOME}/bin:${MAVEN_HOME}/bin:${GRADLE_HOME}/bin:/usr/local/bin/:/root/.local/bin:${ANDROID_HOME}/cmdline-tools/latest/bin:${ANDROID_HOME}/tools:${ANDROID_HOME}/tools/bin:${ANDROID_HOME}/platform-tools: +WORKDIR /opt +COPY ./ci/requirements.txt /opt/ +COPY ./ci/kernel.json /opt/ RUN set -e; \ ARCH_NAME="$(rpm --eval '%{_arch}')"; \ url=; \ @@ -44,10 +50,17 @@ RUN set -e; \ esac; \ echo -e "[nodejs]\nname=nodejs\nstream=20\nprofiles=\nstate=enabled\n" > /etc/dnf/modules.d/nodejs.module \ && microdnf install -y gcc git-core \ - python3.11 python3.11-devel python3.11-pip \ + python3.11 python3.11-devel python3.11-pip graphviz graphviz-gd graphviz-python3 \ pcre2 findutils which tar gzip zip unzip sudo nodejs ncurses sqlite-devel \ && alternatives --install /usr/bin/python3 python /usr/bin/python3.11 1 \ && python3 --version \ + && curl -LO https://repo.almalinux.org/almalinux/9/CRB/${ARCH_NAME}/os/Packages/graphviz-devel-2.44.0-25.el9.${ARCH_NAME}.rpm \ + && rpm -ivh graphviz-devel-2.44.0-25.el9.${ARCH_NAME}.rpm \ + && rm graphviz-devel-2.44.0-25.el9.${ARCH_NAME}.rpm \ + && python3.11 -m pip install -r /opt/requirements.txt notebook owasp-depscan \ + && python3.11 -m pip install virtualenv numpy GitPython pandas graphviz pydotplus networkx[default,extra] \ + && python3.11 -m pip install torch --index-url https://download.pytorch.org/whl/cpu \ + && python3.11 -m pip install torch_geometric pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cpu.html \ && curl -s "https://get.sdkman.io" | bash \ && source "$HOME/.sdkman/bin/sdkman-init.sh" \ && echo -e "sdkman_auto_answer=true\nsdkman_selfupdate_feature=false\nsdkman_auto_env=true" >> $HOME/.sdkman/etc/config \ @@ -67,12 +80,16 @@ RUN set -e; \ && /opt/android-sdk-linux/cmdline-tools/latest/bin/sdkmanager 'platform-tools' --sdk_root=/opt/android-sdk-linux \ && /opt/android-sdk-linux/cmdline-tools/latest/bin/sdkmanager 'platforms;android-33' --sdk_root=/opt/android-sdk-linux \ && /opt/android-sdk-linux/cmdline-tools/latest/bin/sdkmanager 'build-tools;33.0.0' --sdk_root=/opt/android-sdk-linux \ - && mkdir -p /opt -WORKDIR /opt + && curl -Lo coursier https://git.io/coursier-cli \ + && chmod +x coursier \ + && ./coursier launch almond:0.14.0-RC13 --scala 3.3.0 -- --install \ + && cp /opt/kernel.json /root/.local/share/jupyter/kernels/scala/ \ + && sudo npm install -g @cyclonedx/cdxgen --omit=optional \ + && rm -f coursier COPY ./target/chen.zip . -RUN cd /opt/ \ - && unzip -q chen.zip \ - && rm chen.zip \ +COPY ./notebooks /opt/notebooks +RUN unzip -q chen.zip \ + && rm chen.zip kernel.json requirements.txt \ && microdnf clean all - +ENV CLASSPATH=$CLASSPATH:/opt/platform/lib/*.jar: CMD ["chennai"] diff --git a/ci/Dockerfile-note b/ci/Dockerfile-note new file mode 100644 index 0000000..4bf7621 --- /dev/null +++ b/ci/Dockerfile-note @@ -0,0 +1,98 @@ +FROM almalinux:9.2-minimal + +LABEL maintainer="appthreat" \ + org.opencontainers.image.authors="Team AppThreat " \ + org.opencontainers.image.source="https://github.com/appthreat/chen" \ + org.opencontainers.image.url="https://github.com/appthreat/chen" \ + org.opencontainers.image.version="1.0.0" \ + org.opencontainers.image.vendor="appthreat" \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.title="chen" \ + org.opencontainers.image.description="Container image for AppThreat chen code analysis platform" \ + org.opencontainers.docker.cmd="docker run --rm -it -v /tmp:/tmp -p 8192:8192 -v $(pwd):/opt/notebooks:rw -t ghcr.io/appthreat/chen-note --config /opt/config.yml" + +ARG JAVA_VERSION=20.0.2-graalce +ARG MAVEN_VERSION=3.9.3 +ARG GRADLE_VERSION=8.2.1 +ARG POLYNOTE_VERSION="0.5.1" +ARG SCALA_VERSION="2.12" +ARG DIST_TAR="polynote-dist.tar.gz" + +ENV JAVA_VERSION=$JAVA_VERSION \ + MAVEN_VERSION=$MAVEN_VERSION \ + GRADLE_VERSION=$GRADLE_VERSION \ + GRADLE_OPTS="-Dorg.gradle.daemon=false" \ + JAVA_HOME="/opt/java/${JAVA_VERSION}" \ + MAVEN_HOME="/opt/maven/${MAVEN_VERSION}" \ + GRADLE_HOME="/opt/gradle/${GRADLE_VERSION}" \ + POLYNOTE_SCALA_VERSION=${SCALA_VERSION} \ + POLYNOTE_VERSION=${POLYNOTE_VERSION} \ + PYTHON_CMD=python3 \ + PYTHONUNBUFFERED=1 \ + PYTHONIOENCODING="utf-8" \ + JAVA_OPTS="-XX:+UseG1GC -XX:+ExplicitGCInvokesConcurrent -XX:+ParallelRefProcEnabled -XX:+UseStringDeduplication -XX:+UnlockExperimentalVMOptions -XX:G1NewSizePercent=20 -XX:+UnlockDiagnosticVMOptions -XX:G1SummarizeRSetStatsPeriod=1" \ + JOERN_DATAFLOW_TRACKED_WIDTH=128 \ + SCALAPY_PYTHON_LIBRARY=python3.11 \ + ANDROID_HOME=/opt/android-sdk-linux +ENV PATH=${PATH}:/opt/platform:${JAVA_HOME}/bin:${MAVEN_HOME}/bin:${GRADLE_HOME}/bin:/usr/local/bin/:/root/.local/bin:${ANDROID_HOME}/cmdline-tools/latest/bin:${ANDROID_HOME}/tools:${ANDROID_HOME}/tools/bin:${ANDROID_HOME}/platform-tools: + +WORKDIR /opt +RUN set -e; \ + ARCH_NAME="$(rpm --eval '%{_arch}')"; \ + url=; \ + case "${ARCH_NAME##*-}" in \ + 'x86_64') \ + OS_ARCH_SUFFIX=''; \ + GOBIN_VERSION='amd64'; \ + ;; \ + 'aarch64') \ + OS_ARCH_SUFFIX='-aarch64'; \ + GOBIN_VERSION='arm64'; \ + ;; \ + *) echo >&2 "error: unsupported architecture: '$ARCH_NAME'"; exit 1 ;; \ + esac; \ + echo -e "[nodejs]\nname=nodejs\nstream=20\nprofiles=\nstate=enabled\n" > /etc/dnf/modules.d/nodejs.module \ + && microdnf install -y gcc git-core wget \ + python3.11 python3.11-devel python3.11-pip graphviz graphviz-gd graphviz-python3 \ + pcre2 findutils which tar gzip zip unzip sudo nodejs ncurses sqlite-devel \ + && alternatives --install /usr/bin/python3 python /usr/bin/python3.11 1 \ + && python3 --version \ + && curl -s "https://get.sdkman.io" | bash \ + && source "$HOME/.sdkman/bin/sdkman-init.sh" \ + && echo -e "sdkman_auto_answer=true\nsdkman_selfupdate_feature=false\nsdkman_auto_env=true" >> $HOME/.sdkman/etc/config \ + && sdk install java $JAVA_VERSION \ + && sdk install maven $MAVEN_VERSION \ + && sdk install gradle $GRADLE_VERSION \ + && sdk offline enable \ + && mv /root/.sdkman/candidates/* /opt/ \ + && rm -rf /root/.sdkman \ + && curl -LO https://repo.almalinux.org/almalinux/9/CRB/${ARCH_NAME}/os/Packages/graphviz-devel-2.44.0-25.el9.${ARCH_NAME}.rpm \ + && rpm -ivh graphviz-devel-2.44.0-25.el9.${ARCH_NAME}.rpm \ + && rm graphviz-devel-2.44.0-25.el9.${ARCH_NAME}.rpm \ + && python3.11 -m pip install owasp-depscan virtualenv ipython nbconvert numpy jedi jep GitPython pandas matplotlib graphviz pydotplus networkx[default,extra] \ + && python3.11 -m pip install torch --index-url https://download.pytorch.org/whl/cpu \ + && python3.11 -m pip install torch_geometric pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cpu.html \ + && wget -q https://github.com/polynote/polynote/releases/download/$POLYNOTE_VERSION/$DIST_TAR \ + && tar xfzp $DIST_TAR \ + && echo "DIST_TAR=$DIST_TAR" \ + && rm $DIST_TAR \ + && useradd -ms /bin/bash polly \ + && microdnf install -y epel-release \ + && mkdir -p ${ANDROID_HOME}/cmdline-tools \ + && curl -L https://dl.google.com/android/repository/commandlinetools-linux-9477386_latest.zip -o ${ANDROID_HOME}/cmdline-tools/android_tools.zip \ + && unzip ${ANDROID_HOME}/cmdline-tools/android_tools.zip -d ${ANDROID_HOME}/cmdline-tools/ \ + && rm ${ANDROID_HOME}/cmdline-tools/android_tools.zip \ + && mv ${ANDROID_HOME}/cmdline-tools/cmdline-tools ${ANDROID_HOME}/cmdline-tools/latest \ + && yes | /opt/android-sdk-linux/cmdline-tools/latest/bin/sdkmanager --licenses --sdk_root=/opt/android-sdk-linux \ + && /opt/android-sdk-linux/cmdline-tools/latest/bin/sdkmanager 'platform-tools' --sdk_root=/opt/android-sdk-linux \ + && /opt/android-sdk-linux/cmdline-tools/latest/bin/sdkmanager 'platforms;android-33' --sdk_root=/opt/android-sdk-linux \ + && /opt/android-sdk-linux/cmdline-tools/latest/bin/sdkmanager 'build-tools;33.0.0' --sdk_root=/opt/android-sdk-linux \ + && sudo npm install -g @cyclonedx/cdxgen --omit=optional +COPY ./target/chen.zip . +COPY ./notebooks /opt/notebooks +COPY ./ci/config.yml /opt/config.yml +RUN unzip -q chen.zip \ + && chown -R polly:polly /opt \ + && microdnf clean all +EXPOSE 8192 +ENTRYPOINT ["/opt/polynote/polynote.py"] diff --git a/ci/config.yml b/ci/config.yml new file mode 100644 index 0000000..bdb800e --- /dev/null +++ b/ci/config.yml @@ -0,0 +1,14 @@ +listen: + host: 0.0.0.0 + +storage: + dir: /opt/notebooks + +dependencies: + python: + - GitPython + - pandas + - matplotlib + - graphviz + - pydotplus + - networkx[default,extra] diff --git a/ci/kernel.json b/ci/kernel.json new file mode 100644 index 0000000..3e5f510 --- /dev/null +++ b/ci/kernel.json @@ -0,0 +1,13 @@ +{ + "argv": [ + "java", + "-Djna.library.path=/usr/lib64", + "-cp", + "/root/.local/share/jupyter/kernels/scala/launcher.jar:/opt/platform/lib/*.jar", + "coursier.bootstrap.launcher.Launcher", + "--connection-file", + "{connection_file}" + ], + "display_name": "Scala", + "language": "scala" +} diff --git a/ci/requirements.txt b/ci/requirements.txt new file mode 100644 index 0000000..995d43f --- /dev/null +++ b/ci/requirements.txt @@ -0,0 +1,51 @@ +asttokens==2.4.0 +attrs==23.1.0 +backcall==0.2.0 +beautifulsoup4==4.12.2 +bleach==6.0.0 +comm==0.1.4 +debugpy==1.8.0 +decorator==5.1.1 +defusedxml==0.7.1 +executing==1.2.0 +fastjsonschema==2.18.0 +ipykernel==6.25.2 +ipython==8.15.0 +jedi==0.19.0 +Jinja2==3.1.2 +jsonschema==4.19.0 +jsonschema-specifications==2023.7.1 +jupyter-console==6.6.3 +jupyter_client==8.3.1 +jupyter_core==5.3.1 +jupyterlab-pygments==0.2.2 +MarkupSafe==2.1.3 +matplotlib-inline==0.1.6 +mistune==3.0.1 +nbclient==0.8.0 +nbconvert==7.8.0 +nbformat==5.9.2 +nest-asyncio==1.5.8 +packaging==23.1 +pandocfilters==1.5.0 +parso==0.8.3 +pexpect==4.8.0 +pickleshare==0.7.5 +platformdirs==3.10.0 +prompt-toolkit==3.0.39 +psutil==5.9.5 +ptyprocess==0.7.0 +pure-eval==0.2.2 +Pygments==2.16.1 +python-dateutil==2.8.2 +pyzmq==25.1.1 +referencing==0.30.2 +rpds-py==0.10.3 +six==1.16.0 +soupsieve==2.5 +stack-data==0.6.2 +tinycss2==1.2.1 +tornado==6.3.3 +traitlets==5.10.0 +wcwidth==0.2.6 +webencodings==0.5.1 \ No newline at end of file diff --git a/console/build.sbt b/console/build.sbt index e0c737a..53a9f04 100644 --- a/console/build.sbt +++ b/console/build.sbt @@ -27,11 +27,23 @@ libraryDependencies ++= Seq( "com.lihaoyi" %% "os-lib" % "0.9.1", "com.lihaoyi" %% "pprint" % "0.7.3", "com.lihaoyi" %% "cask" % CaskVersion, + "me.shadaj" %% "scalapy-core" % "0.5.2", "org.scalatest" %% "scalatest" % Versions.scalatest % Test ) + Test / compile := (Test / compile).dependsOn((Projects.c2cpg / stage)).value +import ai.kien.python.Python + +lazy val python = Python() + +lazy val javaOpts = python.scalapyProperties.get.map { + case (k, v) => s"""-D$k=$v""" +}.toSeq + +javaOptions ++= javaOpts + githubOwner := "appthreat" githubRepository := "chen" credentials += diff --git a/console/src/main/scala/io/appthreat/console/BridgeBase.scala b/console/src/main/scala/io/appthreat/console/BridgeBase.scala index d32d023..4735c21 100644 --- a/console/src/main/scala/io/appthreat/console/BridgeBase.scala +++ b/console/src/main/scala/io/appthreat/console/BridgeBase.scala @@ -136,19 +136,19 @@ trait BridgeBase extends InteractiveShell with ScriptExecution with PluginHandli opt[String]("server-host") .action((x, c) => c.copy(serverHost = x)) - .text("Hostname on which to expose the CPGQL server") + .text("Hostname on which to expose the Chen server") opt[Int]("server-port") .action((x, c) => c.copy(serverPort = x)) - .text("Port on which to expose the CPGQL server") + .text("Port on which to expose the Chen server") opt[String]("server-auth-username") .action((x, c) => c.copy(serverAuthUsername = Option(x))) - .text("Basic auth username for the CPGQL server") + .text("Basic auth username for the Chen server") opt[String]("server-auth-password") .action((x, c) => c.copy(serverAuthPassword = Option(x))) - .text("Basic auth password for the CPGQL server") + .text("Basic auth password for the Chen server") note("Misc") @@ -181,7 +181,7 @@ trait BridgeBase extends InteractiveShell with ScriptExecution with PluginHandli parser.parse(args, Config()).get } - /** Entry point for Joern's integrated REPL and plugin manager */ + /** Entry point for Chen's integrated REPL and plugin manager */ protected def run(config: Config): Unit = { if (config.listPlugins) { printPluginsAndLayerCreators(config) @@ -206,7 +206,7 @@ trait BridgeBase extends InteractiveShell with ScriptExecution with PluginHandli } protected def createPredefFile(additionalLines: Seq[String] = Nil): Path = { - val tmpFile = Files.createTempFile("joern-predef", "sc") + val tmpFile = Files.createTempFile("chen-predef", "sc") Files.write(tmpFile, (predefLines ++ additionalLines).asJava) tmpFile.toAbsolutePath } diff --git a/console/src/main/scala/io/appthreat/console/Console.scala b/console/src/main/scala/io/appthreat/console/Console.scala index 8cd3540..ff04264 100644 --- a/console/src/main/scala/io/appthreat/console/Console.scala +++ b/console/src/main/scala/io/appthreat/console/Console.scala @@ -15,6 +15,7 @@ import overflowdb.traversal.help.Doc import scala.sys.process.Process import scala.util.control.NoStackTrace import scala.util.{Failure, Success, Try} +import scala.collection.mutable.ListBuffer class Console[T <: Project](loader: WorkspaceLoader[T], baseDir: File = File.currentWorkingDirectory) extends Reporting { @@ -132,8 +133,8 @@ class Console[T <: Project](loader: WorkspaceLoader[T], baseDir: File = File.cur |""", example = "cpg.method.l" ) - implicit def cpg: Cpg = workspace.cpg - implicit def atom: Cpg = workspace.cpg + implicit def cpg: Cpg = workspace.cpg + def atom: Cpg = workspace.cpg /** All cpgs loaded in the workspace */ diff --git a/console/src/main/scala/io/appthreat/console/ConsoleConfig.scala b/console/src/main/scala/io/appthreat/console/ConsoleConfig.scala index a306ead..9fc3ebe 100644 --- a/console/src/main/scala/io/appthreat/console/ConsoleConfig.scala +++ b/console/src/main/scala/io/appthreat/console/ConsoleConfig.scala @@ -21,8 +21,8 @@ class InstallConfig(environment: Map[String, String] = sys.env) { * - running a unit/integration test (note: the jars would be in the local cache, e.g. in ~/.coursier/cache) */ lazy val rootPath: File = { - if (environment.contains("SHIFTLEFT_OCULAR_INSTALL_DIR")) { - environment("SHIFTLEFT_OCULAR_INSTALL_DIR").toFile + if (environment.contains("CHEN_INSTALL_DIR")) { + environment("CHEN_INSTALL_DIR").toFile } else { val uriToLibDir = classOf[InstallConfig].getProtectionDomain.getCodeSource.getLocation.toURI val pathToLibDir = File(uriToLibDir).parent diff --git a/console/src/test/scala/io/appthreat/console/ConsoleConfigTest.scala b/console/src/test/scala/io/appthreat/console/ConsoleConfigTest.scala index 9454643..fbc0eb2 100644 --- a/console/src/test/scala/io/appthreat/console/ConsoleConfigTest.scala +++ b/console/src/test/scala/io/appthreat/console/ConsoleConfigTest.scala @@ -12,8 +12,8 @@ class ConsoleConfigTest extends AnyWordSpec with Matchers { config.rootPath shouldBe ProjectRoot.find } - "set the rootPath to SHIFTLEFT_OCULAR_INSTALL_DIR if it is defined" in { - val config = new InstallConfig(environment = Map("SHIFTLEFT_OCULAR_INSTALL_DIR" -> "/tmp")) + "set the rootPath to CHEN_INSTALL_DIR if it is defined" in { + val config = new InstallConfig(environment = Map("CHEN_INSTALL_DIR" -> "/tmp")) config.rootPath shouldBe File("/tmp") } diff --git a/console/src/test/scala/io/appthreat/console/testing/ConsoleFixture.scala b/console/src/test/scala/io/appthreat/console/testing/ConsoleFixture.scala index 7f0b85e..8e6de97 100644 --- a/console/src/test/scala/io/appthreat/console/testing/ConsoleFixture.scala +++ b/console/src/test/scala/io/appthreat/console/testing/ConsoleFixture.scala @@ -40,7 +40,7 @@ object TestWorkspaceLoader extends WorkspaceLoader[Project] { class TestConsole(workspaceDir: String) extends Console[Project](TestWorkspaceLoader, File(workspaceDir)) { override def config = - new ConsoleConfig(install = new InstallConfig(Map("SHIFTLEFT_OCULAR_INSTALL_DIR" -> workspaceDir))) + new ConsoleConfig(install = new InstallConfig(Map("CHEN_INSTALL_DIR" -> workspaceDir))) override def importCode: ImportCode[Project] = new ImportCode(this) { override val generatorFactory = new TestCpgGeneratorFactory(config) diff --git a/notebooks/README.md b/notebooks/README.md new file mode 100644 index 0000000..e69de29 diff --git a/platform/frontends/c2cpg/README.md b/platform/frontends/c2cpg/README.md index 75ffb15..bd4cb37 100644 --- a/platform/frontends/c2cpg/README.md +++ b/platform/frontends/c2cpg/README.md @@ -1,5 +1,3 @@ -[![CI pipeline](https://github.com/ShiftLeftSecurity/codepropertygraph/actions/workflows/push.yml/badge.svg)](https://github.com/ShiftLeftSecurity/codepropertygraph/actions/workflows/push.yml) - # c2cpg An [Eclipse CDT](https://wiki.eclipse.org/CDT/designs/Overview_of_Parsing) based parser for C/C++ that creates code property graphs according to the specification at https://github.com/ShiftLeftSecurity/codepropertygraph . @@ -10,7 +8,7 @@ The build process has been verified on Linux, and it should be possible to build on OS X and BSD systems as well. The build process requires the following prerequisites: -* Java runtime 11 +* Java runtime 17 - Link: http://openjdk.java.net/install/ * Scala build tool (sbt) - Link: https://www.scala-sbt.org/ @@ -38,45 +36,3 @@ Run the following to see a complete list of available options: ```shell script ./c2cpg.sh --help ``` - -## Parser Benchmarks - -When run on [the linux kernel sources](https://github.com/torvalds/linux) we got: - -``` -Overall time: 43 sec for 51805 file(s) -Total parse failures: 0 -Number of files with problems: 32940 // those are files with unsupported C or C++ statements (see below) or unresolvable includes -``` - -on this machine: - -``` -Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz -RAM 32,0 GB (24 GB for the JVM) -Samsung SSD 970 EVO Plus 1TB -``` - -Includes for the linux kernel are partly unresolvable. These are files actually not contained in the linux GitHub repository, e.g., stuff in: - - `drivers/` for `gpu/` and/or `drm/` - - `fs/` for specific filesystems like `xfs/` - -On a smaller system (`Intel(R) Core(TM) i7-8665U CPU @ 1.90GHz, 16GB RAM`) with `-Xmx8GB` we got: - -``` -Overall time: 2.082 min for 51805 file(s) -Total parse failures: 0 -Number of files with problems: 32940 -``` - -## Dealing with Parser problems: -(copied from [here](https://wiki.eclipse.org/CDT/designs/Overview_of_Parsing)) - -In case a syntax error is encountered in the source a _problem node_ will be generated in the AST. There are four types of problem node: - - `IASTProblemDeclaration` - - `IASTProblemExpression` - - `IASTProblemStatement` - - `IASTProblemTypeId` - -The parser is usually capable of recovering from most syntax errors, generating a problem node and resuming the parse. -Problem nodes may also be generated by the preprocessor, for example if a macro is not used properly or if an included file cannot be found. diff --git a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/Main.scala b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/Main.scala index 18e1b10..0089303 100644 --- a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/Main.scala +++ b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/Main.scala @@ -8,7 +8,9 @@ import scopt.OParser import scala.util.control.NonFatal final case class Config( + includeFiles: Set[String] = Set.empty, includePaths: Set[String] = Set.empty, + macroFiles: Set[String] = Set.empty, defines: Set[String] = Set.empty, includeComments: Boolean = false, logProblems: Boolean = false, @@ -19,10 +21,16 @@ final case class Config( includeImageLocations: Boolean = false, useProjectIndex: Boolean = true ) extends X2CpgConfig[Config] { + def withIncludeFiles(includeFiles: Set[String]): Config = { + this.copy(includeFiles = includeFiles).withInheritedFields(this) + } def withIncludePaths(includePaths: Set[String]): Config = { this.copy(includePaths = includePaths).withInheritedFields(this) } + def withMacroFiles(macroFiles: Set[String]): Config = { + this.copy(macroFiles = macroFiles).withInheritedFields(this) + } def withDefines(defines: Set[String]): Config = { this.copy(defines = defines).withInheritedFields(this) } @@ -84,6 +92,14 @@ private object Frontend { .unbounded() .text("header include paths") .action((incl, c) => c.withIncludePaths(c.includePaths + incl)), + opt[String]("include-files") + .unbounded() + .text("header include files") + .action((inclf, c) => c.withIncludeFiles(c.includeFiles + inclf)), + opt[String]("macro-files") + .unbounded() + .text("macro files") + .action((macrof, c) => c.withMacroFiles(c.macroFiles + macrof)), opt[Unit]("no-include-auto-discovery") .text("disables auto discovery of system header include paths") .hidden(), diff --git a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/astcreation/AstCreatorHelper.scala b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/astcreation/AstCreatorHelper.scala index a1da70f..686496b 100644 --- a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/astcreation/AstCreatorHelper.scala +++ b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/astcreation/AstCreatorHelper.scala @@ -10,7 +10,6 @@ import io.shiftleft.utils.IOUtils import org.apache.commons.lang.StringUtils import org.eclipse.cdt.core.dom.ast.* import org.eclipse.cdt.core.dom.ast.c.{ICASTArrayDesignator, ICASTDesignatedInitializer, ICASTFieldDesignator} -import org.eclipse.cdt.core.dom.ast.c.ICASTTypedefNameSpecifier import org.eclipse.cdt.core.dom.ast.cpp.* import org.eclipse.cdt.core.dom.ast.gnu.c.ICASTKnRFunctionDeclarator import org.eclipse.cdt.internal.core.dom.parser.c.CASTArrayRangeDesignator diff --git a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/parser/CdtParser.scala b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/parser/CdtParser.scala index 1de46b9..e52d581 100644 --- a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/parser/CdtParser.scala +++ b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/parser/CdtParser.scala @@ -9,8 +9,7 @@ import org.eclipse.cdt.core.dom.ast.gnu.cpp.GPPLanguage import org.eclipse.cdt.core.dom.ast.{IASTPreprocessorStatement, IASTTranslationUnit} import org.eclipse.cdt.core.index.IIndex import org.eclipse.cdt.core.model.{CoreModel, ICProject, ILanguage} -import org.eclipse.cdt.core.parser.{DefaultLogService, ScannerInfo} -import org.eclipse.cdt.core.parser.FileContent +import org.eclipse.cdt.core.parser.{DefaultLogService, ExtendedScannerInfo, FileContent} import org.eclipse.cdt.internal.core.dom.parser.cpp.semantics.CPPVisitor import org.eclipse.cdt.internal.core.index.EmptyCIndex import org.slf4j.LoggerFactory @@ -72,11 +71,16 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta } } - private def createScannerInfo(file: Path): ScannerInfo = { + private def createScannerInfo(file: Path): ExtendedScannerInfo = { val additionalIncludes = if (FileDefaults.isCPPFile(file.toString)) parserConfig.systemIncludePathsCPP else parserConfig.systemIncludePathsC - new ScannerInfo(definedSymbols, (includePaths ++ additionalIncludes).map(_.toString).toArray) + new ExtendedScannerInfo( + definedSymbols, + (includePaths ++ additionalIncludes).map(_.toString).toArray, + parserConfig.macroFiles.map(_.toString).toArray, + parserConfig.includeFiles.map(_.toString).toArray + ) } private def parseInternal(file: Path): ParseResult = { @@ -130,7 +134,6 @@ class CdtParser(config: Config) extends ParseProblemsLogger with PreprocessorSta val parseResult = parseInternal(file) parseResult match { case ParseResult(Some(t), c, p, _) => - logger.debug(s"Parsed '${t.getFilePath}' ($c preprocessor error(s), $p problems)") Option(t) case ParseResult(_, _, _, maybeThrowable) => logger.warn( diff --git a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/parser/ParserConfig.scala b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/parser/ParserConfig.scala index 87e420f..f25ce89 100644 --- a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/parser/ParserConfig.scala +++ b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/parser/ParserConfig.scala @@ -8,9 +8,19 @@ import java.nio.file.{Path, Paths} object ParserConfig { def empty: ParserConfig = - ParserConfig(Set.empty, Set.empty, Set.empty, Map.empty, logProblems = false, logPreprocessor = false) + ParserConfig( + Set.empty, + Set.empty, + Set.empty, + Set.empty, + Map.empty, + Set.empty, + logProblems = false, + logPreprocessor = false + ) def fromConfig(config: Config): ParserConfig = ParserConfig( + config.includeFiles.map(Paths.get(_).toAbsolutePath), config.includePaths.map(Paths.get(_).toAbsolutePath), IncludeAutoDiscovery.discoverIncludePathsC(config), IncludeAutoDiscovery.discoverIncludePathsCPP(config), @@ -20,6 +30,7 @@ object ParserConfig { s.head -> s(1) case define => define -> "true" }.toMap ++ DefaultDefines.DEFAULT_CALL_CONVENTIONS, + config.macroFiles.map(Paths.get(_).toAbsolutePath), config.logProblems, config.logPreprocessor ) @@ -27,10 +38,12 @@ object ParserConfig { } case class ParserConfig( + includeFiles: Set[Path], userIncludePaths: Set[Path], systemIncludePathsC: Set[Path], systemIncludePathsCPP: Set[Path], definedSymbols: Map[String, String], + macroFiles: Set[Path], logProblems: Boolean, logPreprocessor: Boolean ) diff --git a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/passes/AstCreationPass.scala b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/passes/AstCreationPass.scala index ad8f1eb..295bdd4 100644 --- a/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/passes/AstCreationPass.scala +++ b/platform/frontends/c2cpg/src/main/scala/io/appthreat/c2cpg/passes/AstCreationPass.scala @@ -38,22 +38,18 @@ class AstCreationPass(cpg: Cpg, config: Config, report: Report = new Report()) override def runOnPart(diffGraph: DiffGraphBuilder, filename: String): Unit = { val path = Paths.get(filename).toAbsolutePath val relPath = SourceFiles.toRelativePath(path.toString, config.inputPath) - val fileLOC = io.shiftleft.utils.IOUtils.readLinesInFile(path).size - val (gotCpg, duration) = TimeUtils.time { + try { val parseResult = parser.parse(path) parseResult match { case Some(translationUnit) => - report.addReportInfo(relPath, fileLOC, parsed = true) val localDiff = new AstCreator(relPath, config, translationUnit, file2OffsetTable)(config.schemaValidation).createAst() diffGraph.absorb(localDiff) - true case None => - report.addReportInfo(relPath, fileLOC) - false } + } catch { + case e: Throwable => } - report.updateReport(relPath, gotCpg, duration) } } diff --git a/platform/src/main/scala/io/appthreat/chencli/console/Predefined.scala b/platform/src/main/scala/io/appthreat/chencli/console/Predefined.scala index 5809f01..a508f05 100644 --- a/platform/src/main/scala/io/appthreat/chencli/console/Predefined.scala +++ b/platform/src/main/scala/io/appthreat/chencli/console/Predefined.scala @@ -6,6 +6,7 @@ object Predefined { val shared: Seq[String] = Seq( + "import scala.collection.mutable.ListBuffer", "import _root_.io.appthreat.console._", "import _root_.io.appthreat.chencli.console.ChenConsole._", "import _root_.io.shiftleft.codepropertygraph.Cpg", @@ -20,7 +21,90 @@ object Predefined { "import overflowdb.traversal.{`package` => _, help => _, _}", "import scala.jdk.CollectionConverters._", "implicit val resolver: ICallResolver = NoResolve", - "implicit val finder: NodeExtensionFinder = DefaultNodeExtensionFinder" + "implicit val finder: NodeExtensionFinder = DefaultNodeExtensionFinder", + "import me.shadaj.scalapy.py", + "import me.shadaj.scalapy.py.SeqConverters", + "import py.PyQuote", + "import me.shadaj.scalapy.interpreter.CPythonInterpreter", + "implicit val pyGlobal: me.shadaj.scalapy.py.Dynamic.global.type = py.Dynamic.global", + """ + | + | def printDashes(count: Int) = { + | var tabStr = "+--- " + | var i = 0 + | while (i < count) { + | tabStr = "| " + tabStr + | i += 1 + | } + | tabStr + | } + | + | def callTree(callerFullName: String, tree: ListBuffer[String] = new ListBuffer[String](), depth: Int = 3)(implicit atom: Cpg): ListBuffer[String] = { + | var dashCount = 0 + | var lastCallerMethod = callerFullName + | var lastDashCount = 0 + | tree += callerFullName + | + | def findCallee(methodName: String, tree: ListBuffer[String]): ListBuffer[String] = { + | val calleeList = atom.method.fullNameExact(methodName).callee.whereNot(_.name(".* + | tree += s"${printDashes(dashCount)}${c.fullName}~~${c.location.filename}#${c.lineNumber.getOrElse(0)}" + | findCallee(c.fullName, tree) + | } + | } + | tree + | } + | + | findCallee(lastCallerMethod, tree) + | } + |""".stripMargin, + """ + |import overflowdb.formats.ExportResult + |import overflowdb.formats.graphml.GraphMLExporter + |import java.nio.file.{Path, Paths} + | + |case class MethodSubGraph(methodName: String, nodes: Set[Node]) { + | def edges: Set[Edge] = { + | for { + | node <- nodes + | edge <- node.bothE.asScala + | if nodes.contains(edge.inNode) && nodes.contains(edge.outNode) + | } yield edge + | } + |} + | + |def plus(resultA: ExportResult, resultB: ExportResult): ExportResult = { + | ExportResult( + | nodeCount = resultA.nodeCount + resultB.nodeCount, + | edgeCount = resultA.edgeCount + resultB.edgeCount, + | files = resultA.files ++ resultB.files, + | additionalInfo = resultA.additionalInfo + | ) + |} + | + |def splitByMethod(atom: Cpg): IterableOnce[MethodSubGraph] = { + | atom.method.map { method => + | MethodSubGraph(methodName = method.name, nodes = method.ast.toSet) + | } + |} + | + |def toGraphML(methodFullName: String, gmlFileName: String)(implicit atom: Cpg) = { + | splitByMethod(atom).iterator + | .map { case subGraph @ MethodSubGraph(methodName, nodes) => + | GraphMLExporter.runExport(nodes, subGraph.edges, Paths.get(gmlFileName)) + | } + | .reduce(plus) + |} + |""".stripMargin ) val forInteractiveShell: Seq[String] = { diff --git a/project/build.properties b/project/build.properties index 3040987..2743082 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.9.4 +sbt.version=1.9.6 diff --git a/project/plugins.sbt b/project/plugins.sbt index f8faaca..b1d20f8 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,7 +1,7 @@ addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3") addSbtPlugin("com.github.sbt" % "sbt-native-packager" % "1.9.16") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.6") -addSbtPlugin("io.shiftleft" % "sbt-ci-release-early" % "2.0.19") -addSbtPlugin("com.github.sbt" % "sbt-dynver" % "5.0.1") addSbtPlugin("com.codecommit" % "sbt-github-packages" % "0.5.2") -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.1") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.3") + +libraryDependencies += "ai.kien" %% "python-native-libs" % "0.2.4" diff --git a/semanticcpg/build.sbt b/semanticcpg/build.sbt index e5db4a9..afd3309 100644 --- a/semanticcpg/build.sbt +++ b/semanticcpg/build.sbt @@ -4,6 +4,7 @@ libraryDependencies ++= Seq( "io.shiftleft" %% "codepropertygraph" % Versions.cpg, "com.michaelpollmeier" %% "scala-repl-pp" % Versions.scalaReplPP, "org.json4s" %% "json4s-native" % Versions.json4s, + "me.shadaj" %% "scalapy-core" % "0.5.2", "org.scalatest" %% "scalatest" % Versions.scalatest % Test ) diff --git a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/Steps.scala b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/Steps.scala index cbfcd42..3957fa7 100644 --- a/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/Steps.scala +++ b/semanticcpg/src/main/scala/io/shiftleft/semanticcpg/language/Steps.scala @@ -11,6 +11,11 @@ import java.util.List as JList import scala.collection.mutable import scala.jdk.CollectionConverters.* +import me.shadaj.scalapy.py +import me.shadaj.scalapy.py.SeqConverters +import py.PyQuote +import me.shadaj.scalapy.interpreter.CPythonInterpreter + /** Base class for our DSL These are the base steps available in all steps of the query language. There are no * constraints on the element types, unlike e.g. [[NodeSteps]] */ @@ -76,6 +81,9 @@ class Steps[A](val traversal: Iterator[A]) extends AnyVal { else write(results) } + private def pyJson = py.module("json") + @Doc(info = "execute traversal and convert the result to python object") + def toPy: me.shadaj.scalapy.py.Dynamic = pyJson.loads(toJson(false)) } object Steps {