Skip to content

Commit

Permalink
[ruby] Basic type recovery (#4598)
Browse files Browse the repository at this point in the history
* [ruby] Moved deprecated type pass to new frontend as is, working on adapting to new ruby frontend

* [gitignore] changed paths to use wildcards

* [x2cpg] Added OverloadableScope to ProgramSummary, implemented on C# and tested to be working

* [ruby] Added RubyStubbedType for type-stubs and dependencyDownload types

* [ruby] Removed import tests for now, as well as deprecated type prop pass from new frontend, TODO for next PR

* [ruby] Added info to test

* [ruby] review comments

* [ruby] WIP: Type recovery

* [ruby] type recovery on method returns and constructor initializations working

* [ruby] working on import resolver

* [ruby] type recovery for import resolution and external dependencies is working in basic cases

* [ruby] Working on type recovery tests

* [ruby] basic type recovery tests

* [ruby] parallel type recovery pass

* [ruby] commented out code removed

* [ruby] Change string literals to constant variables

* [ruby] changed to constants
  • Loading branch information
AndreiDreyer authored May 24, 2024
1 parent 5ddc0bf commit 0e2e8ef
Show file tree
Hide file tree
Showing 7 changed files with 536 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ import io.joern.rubysrc2cpg.passes.{
ConfigFileCreationPass,
DependencyPass,
ImplicitRequirePass,
ImportsPass
ImportsPass,
RubyImportResolverPass,
RubyTypeHintCallLinker
}
import io.joern.rubysrc2cpg.utils.DependencyDownloader
import io.joern.x2cpg.X2Cpg.withNewEmptyCpg
import io.joern.x2cpg.passes.base.AstLinkerPass
import io.joern.x2cpg.passes.callgraph.NaiveCallLinker
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass}
import io.joern.x2cpg.passes.frontend.{MetaDataPass, TypeNodePass, XTypeRecoveryConfig}
import io.joern.x2cpg.utils.{ConcurrentTaskUtil, ExternalCommand}
import io.joern.x2cpg.{SourceFiles, X2CpgFrontend}
import io.shiftleft.codepropertygraph.Cpg
Expand Down Expand Up @@ -160,7 +162,9 @@ object RubySrc2Cpg {
new AstLinkerPass(cpg)
)
} else {
List()
List(new RubyImportResolverPass(cpg)) ++
new passes.RubyTypeRecoveryPassGenerator(cpg, config = XTypeRecoveryConfig(iterations = 4))
.generate() ++ List(new RubyTypeHintCallLinker(cpg), new NaiveCallLinker(cpg), new AstLinkerPass(cpg))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,14 +113,26 @@ class RubyScope(summary: RubyProgramSummary, projectRoot: Option[String])
} else {
resolvedPath :: Nil
}
pathsToImport.flatMap(summary.pathToType.getOrElse(_, Set())) match {
case x if x.nonEmpty =>
x.foreach { ty => addImportedTypeOrModule(ty.name) }
case _ =>
addRequireGem(path)

pathsToImport.foreach { pathName =>
// Pull in type / module defs
summary.pathToType.getOrElse(pathName, Set()) match {
case x if x.nonEmpty =>
x.foreach { ty => addImportedTypeOrModule(ty.name) }
addImportedFunctions(pathName)
case _ =>
addRequireGem(path)
}
}
}

def addImportedFunctions(importName: String): Unit = {
val matchingTypes = summary.namespaceToType.values.flatten.filter(x =>
x.name.startsWith(importName) && x.name.endsWith(RDefines.Program)
)
typesInScope.addAll(matchingTypes)
}

def addInclude(typeOrModule: String): Unit = {
addImportedMember(typeOrModule)
}
Expand Down Expand Up @@ -267,6 +279,9 @@ class RubyScope(summary: RubyProgramSummary, projectRoot: Option[String])
Option(RubyType(s"${GlobalTypes.builtinPrefix}.$normalizedTypeName", List.empty, List.empty))
case None =>
summary.namespaceToType.flatMap(_._2).collectFirst {
case x if x.name.split("[.]").endsWith(normalizedTypeName.split("[.]")) =>
typesInScope.addOne(x)
x
case x if x.name.split("[.]").lastOption.contains(normalizedTypeName) =>
typesInScope.addOne(x)
x
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package io.joern.rubysrc2cpg.passes

import better.files.File
import io.joern.rubysrc2cpg.deprecated.utils.PackageTable
import io.joern.x2cpg.Defines as XDefines
import io.shiftleft.semanticcpg.language.importresolver.*
import io.joern.x2cpg.passes.frontend.XImportResolverPass
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Call
import io.shiftleft.semanticcpg.language.*
import io.joern.rubysrc2cpg.passes.Defines as RDefines
import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal

import java.io.File as JFile
import java.util.regex.{Matcher, Pattern}
class RubyImportResolverPass(cpg: Cpg) extends XImportResolverPass(cpg) {

private val pathPattern = Pattern.compile("[\"']([\\w/.]+)[\"']")

override protected def optionalResolveImport(
fileName: String,
importCall: Call,
importedEntity: String,
importedAs: String,
diffGraph: DiffGraphBuilder
): Unit = {

resolveEntities(importedEntity, importCall, fileName).foreach(x => evaluatedImportToTag(x, importCall, diffGraph))
}

private def resolveEntities(expEntity: String, importCall: Call, fileName: String): Set[EvaluatedImport] = {
// TODO: Currently only working on internal dependencies, will be fixed for external dependencies once the dependency linking is done
val expResolvedPath =
if (expEntity.contains("."))
getResolvedPath(expEntity, fileName)
else if (cpg.file.name(s".*$expEntity.rb").nonEmpty)
getResolvedPath(s"$expEntity.rb", fileName)
else
expEntity

// TODO Limited ResolvedMethod exposure for now, will open up after looking at more concrete examples
val finalResolved = {
val filePattern = s"${Pattern.quote(expResolvedPath)}\\.?.*"
val resolvedTypeDecls = cpg.typeDecl
.where(_.file.name(filePattern))
.fullName
.flatMap(fullName =>
Seq(
ResolvedTypeDecl(fullName),
ResolvedMethod(s"$fullName.${XDefines.ConstructorMethodName}", "new", fullName.split("[.]").lastOption)
)
)
.toSet

val resolvedModules = cpg.namespaceBlock
.whereNot(_.nameExact(NamespaceTraversal.globalNamespaceName))
.where(_.file.name(filePattern))
.flatMap(module => Seq(ResolvedTypeDecl(module.fullName)))
.toSet

// Expose methods which are directly present in a file, without any module, TypeDecl
val resolvedMethods = cpg.method
.where(_.file.name(filePattern))
.where(_.nameExact(RDefines.Program))
.astChildren
.astChildren
.isMethod
.flatMap(method => Seq(ResolvedMethod(method.fullName, method.name)))
.toSet
resolvedTypeDecls ++ resolvedModules ++ resolvedMethods
}.collectAll[EvaluatedImport].toSet

finalResolved
}

def getResolvedPath(expEntity: String, fileName: String) = {
val rawEntity = expEntity.stripPrefix("./")
val matcher = pathPattern.matcher(rawEntity)
val sep = Matcher.quoteReplacement(JFile.separator)
val root = s"$codeRootDir${JFile.separator}"
val currentFile = s"$root$fileName"
val entity = if (matcher.find()) matcher.group(1) else rawEntity
val resolvedPath = better.files
.File(
currentFile.stripSuffix(currentFile.split(sep).lastOption.getOrElse("")),
entity.split("\\.").headOption.getOrElse(entity)
)
.pathAsString match {
case resPath if entity.endsWith(".rb") => s"$resPath.rb"
case resPath => resPath
}
resolvedPath.stripPrefix(root)
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package io.joern.rubysrc2cpg.passes

import io.joern.x2cpg.passes.frontend.XTypeHintCallLinker
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.Call
import io.shiftleft.semanticcpg.language.*

class RubyTypeHintCallLinker(cpg: Cpg) extends XTypeHintCallLinker(cpg) {

override def calls: Iterator[Call] = super.calls.nameNot("^(require).*")

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package io.joern.rubysrc2cpg.passes

import io.joern.x2cpg.Defines as XDefines
import io.joern.x2cpg.passes.frontend.*
import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.codepropertygraph.generated.nodes.*
import io.shiftleft.semanticcpg.language.*
import overflowdb.BatchedUpdate.DiffGraphBuilder

class RubyTypeRecoveryPassGenerator(cpg: Cpg, config: XTypeRecoveryConfig = XTypeRecoveryConfig())
extends XTypeRecoveryPassGenerator[File](cpg, config) {
override protected def generateRecoveryPass(state: XTypeRecoveryState, iteration: Int): XTypeRecovery[File] =
new RubyTypeRecovery(cpg, state, iteration)
}

private class RubyTypeRecovery(cpg: Cpg, state: XTypeRecoveryState, iteration: Int)
extends XTypeRecovery[File](cpg, state, iteration) {

override def compilationUnits: Iterator[File] = cpg.file.iterator

override def generateRecoveryForCompilationUnitTask(
unit: File,
builder: DiffGraphBuilder
): RecoverForXCompilationUnit[File] = {
new RecoverForRubyFile(cpg, unit, builder, state)
}
}

private class RecoverForRubyFile(cpg: Cpg, cu: File, builder: DiffGraphBuilder, state: XTypeRecoveryState)
extends RecoverForXCompilationUnit[File](cpg, cu, builder, state) {

/** A heuristic method to determine if a call is a constructor or not.
*/
override protected def isConstructor(c: Call): Boolean = {
isConstructor(c.name) && c.code.charAt(0).isUpper
}

/** A heuristic method to determine if a call name is a constructor or not.
*/
override protected def isConstructor(name: String): Boolean =
!name.isBlank && (name == "new" || name == XDefines.ConstructorMethodName)

override protected def hasTypes(node: AstNode): Boolean = node match {
case x: Call if !x.methodFullName.startsWith("<operator>") =>
!x.methodFullName.toLowerCase().matches("(<unknownfullname>|any)")
case x: Call if x.methodFullName.startsWith("<operator>") =>
x.typeFullName != "<empty>" && super.hasTypes(node)
case x => super.hasTypes(node)
}

override def visitImport(i: Import): Unit = for {
resolvedImport <- i.call.tag
alias <- i.importedAs
} {
import io.shiftleft.semanticcpg.language.importresolver.*
EvaluatedImport.tagToEvaluatedImport(resolvedImport).foreach {
case ResolvedTypeDecl(fullName, _) =>
symbolTable.append(LocalVar(fullName.split("\\.").lastOption.getOrElse(alias)), fullName)
case _ => super.visitImport(i)
}
}

override def visitIdentifierAssignedToConstructor(i: Identifier, c: Call): Set[String] = {
associateTypes(i, Set(i.typeFullName))
}
}
Loading

0 comments on commit 0e2e8ef

Please sign in to comment.