Skip to content

Commit

Permalink
[php2cpg] Improve performance. (#4867)
Browse files Browse the repository at this point in the history
* [php2cpg] Improve performance.

This php parser we use seems to have some warmup phase in which it
has very poor performance which makes it very expensive to feed
it single files like we so far did.
This change invokes the php parser with groups of 20 files which on my
machine give ~40% performance increase for the total frontend runtime on
large projects.
Bigger groups of files strangely only show marginal performance
increases despite the fact that profiling the frontend indicates that
there is still a lot of time wasted by the large amount of individual
parser invocations.
For now we keep it like this because for bigger groups we would run into
argument length limitations which are especially a pain on systems
like Windows. Sadly the php parser does not provide means to specify
the to be parsed files other than as a list on the command line.
So there would need to be some change there first before we could
increase the groups size in a meaningful way.
  • Loading branch information
ml86 authored Aug 22, 2024
1 parent b341067 commit 5f89dfe
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 79 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,27 @@ import io.shiftleft.semanticcpg.language.types.structure.NamespaceTraversal
import org.slf4j.LoggerFactory

import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path}
import scala.collection.mutable

class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String], disableFileContent: Boolean)(implicit
class AstCreator(relativeFileName: String, fileName: String, phpAst: PhpFile, disableFileContent: Boolean)(implicit
withSchemaValidation: ValidationMode
) extends AstCreatorBase(filename)
) extends AstCreatorBase(relativeFileName)
with AstNodeBuilder[PhpNode, AstCreator] {

private val logger = LoggerFactory.getLogger(AstCreator.getClass)
private val scope = new Scope()(() => nextClosureName())
private val tmpKeyPool = new IntervalKeyPool(first = 0, last = Long.MaxValue)
private val globalNamespace = globalNamespaceBlock()
private var fileContent = Option.empty[String]

private def getNewTmpName(prefix: String = "tmp"): String = s"$prefix${tmpKeyPool.next.toString}"

override def createAst(): DiffGraphBuilder = {
if (!disableFileContent) {
fileContent = Some(Files.readString(Path.of(fileName)))
}

val ast = astForPhpFile(phpAst)
storeInDiffGraph(ast, diffGraph)
diffGraph
Expand All @@ -51,7 +57,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
file,
globalNamespace.name,
globalNamespace.fullName,
filename,
relativeFileName,
globalNamespace.code,
NodeTypes.NAMESPACE_BLOCK,
globalNamespace.fullName
Expand All @@ -75,7 +81,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
}

private def astForPhpFile(file: PhpFile): Ast = {
val fileNode = NewFile().name(filename)
val fileNode = NewFile().name(relativeFileName)
fileContent.foreach(fileNode.content(_))

scope.pushNewScope(globalNamespace)
Expand Down Expand Up @@ -135,7 +141,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
case enumCase: PhpEnumCaseStmt => astForEnumCase(enumCase) :: Nil
case staticStmt: PhpStaticStmt => astsForStaticStmt(staticStmt)
case unhandled =>
logger.error(s"Unhandled stmt $unhandled in $filename")
logger.error(s"Unhandled stmt $unhandled in $relativeFileName")
???
}
}
Expand Down Expand Up @@ -231,7 +237,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
}
val methodCode = s"${modifierString}function $methodName(${parameters.map(_.rootCodeOrEmpty).mkString(",")})"

val method = methodNode(decl, methodName, methodCode, fullName, Some(signature), filename)
val method = methodNode(decl, methodName, methodCode, fullName, Some(signature), relativeFileName)

scope.pushNewScope(method)

Expand Down Expand Up @@ -477,7 +483,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],

private def astForNamespaceStmt(stmt: PhpNamespaceStmt): Ast = {
val name = stmt.name.map(_.name).getOrElse(NameConstants.Unknown)
val fullName = s"$filename:$name"
val fullName = s"$relativeFileName:$name"

val namespaceBlock = NewNamespaceBlock()
.name(name)
Expand Down Expand Up @@ -615,7 +621,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
val itemUpdateAst = itemInitAst.root match {
case Some(initRoot: AstNodeNew) => itemInitAst.subTreeCopy(initRoot)
case _ =>
logger.warn(s"Could not copy foreach init ast in $filename")
logger.warn(s"Could not copy foreach init ast in $relativeFileName")
Ast()
}

Expand Down Expand Up @@ -746,7 +752,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
Ast(local) :: assignmentAst.toList

case other =>
logger.warn(s"Unexpected static variable type $other in $filename")
logger.warn(s"Unexpected static variable type $other in $relativeFileName")
Nil
}
}
Expand Down Expand Up @@ -783,7 +789,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
prependNamespacePrefix(name.name)
}

val typeDecl = typeDeclNode(stmt, name.name, fullName, filename, code, inherits = inheritsFrom)
val typeDecl = typeDeclNode(stmt, name.name, fullName, relativeFileName, code, inherits = inheritsFrom)
val createDefaultConstructor = stmt.hasConstructor

scope.pushNewScope(typeDecl)
Expand All @@ -802,7 +808,8 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
case inits =>
val signature = s"${TypeConstants.Void}()"
val fullName = composeMethodFullName(StaticInitMethodName, isStatic = true)
val ast = staticInitMethodAst(inits, fullName, Option(signature), TypeConstants.Void, fileName = Some(filename))
val ast =
staticInitMethodAst(inits, fullName, Option(signature), TypeConstants.Void, fileName = Some(relativeFileName))
Option(ast)
}

Expand Down Expand Up @@ -867,7 +874,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],

val thisParam = thisParamAstForMethod(originNode)

val method = methodNode(originNode, ConstructorMethodName, fullName, fullName, Some(signature), filename)
val method = methodNode(originNode, ConstructorMethodName, fullName, fullName, Some(signature), relativeFileName)

val methodBody = blockAst(blockNode(originNode), scope.getFieldInits)

Expand Down Expand Up @@ -1117,7 +1124,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
.sortBy(_.argumentIndex)

if (args.size != 2) {
val position = s"${line(assignment).getOrElse("")}:$filename"
val position = s"${line(assignment).getOrElse("")}:$relativeFileName"
logger.warn(s"Expected 2 call args for emptyArrayDimAssign. Not resetting code: $position")
} else {
val codeOverride = s"${args.head.code}[] = ${args.last.code}"
Expand Down Expand Up @@ -1561,14 +1568,14 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
Some(localNode(closureExpr, name, s"$byRefPrefix$$$name", typeFullName))

case other =>
logger.warn(s"Found incorrect closure use variable '$other' in $filename")
logger.warn(s"Found incorrect closure use variable '$other' in $relativeFileName")
None
}
}

// Add closure bindings to diffgraph
localsForUses.foreach { local =>
val closureBindingId = s"$filename:$methodName:${local.name}"
val closureBindingId = s"$relativeFileName:$methodName:${local.name}"
local.closureBindingId(closureBindingId)
scope.addToScope(local.name, local)

Expand Down Expand Up @@ -1749,7 +1756,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
callAst(accessNode, variableAst :: dimensionAst :: Nil)

case None =>
val errorPosition = s"$variableCode:${line(expr).getOrElse("")}:$filename"
val errorPosition = s"$variableCode:${line(expr).getOrElse("")}:$relativeFileName"
logger.error(s"ArrayDimFetchExpr without dimensions should be handled in assignment: $errorPosition")
Ast()
}
Expand Down Expand Up @@ -1823,7 +1830,7 @@ class AstCreator(filename: String, phpAst: PhpFile, fileContent: Option[String],
.getOrElse(nameExpr.name)

case expr =>
logger.warn(s"Unexpected expression as class name in <class>::class expression: $filename")
logger.warn(s"Unexpected expression as class name in <class>::class expression: $relativeFileName")
NameConstants.Unknown
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,64 +6,125 @@ import io.joern.php2cpg.parser.Domain.PhpFile
import io.joern.x2cpg.utils.ExternalCommand
import org.slf4j.LoggerFactory

import java.nio.file.Path
import java.nio.file.Paths
import java.util.regex.Pattern
import scala.collection.mutable
import scala.io.Source
import scala.util.{Failure, Success, Try}

class PhpParser private (phpParserPath: String, phpIniPath: String, disableFileContent: Boolean) {

private val logger = LoggerFactory.getLogger(this.getClass)

private def phpParseCommand(filename: String): String = {
private def phpParseCommand(filenames: collection.Seq[String]): String = {
val phpParserCommands = "--with-recovery --resolve-names --json-dump"
s"php --php-ini $phpIniPath $phpParserPath $phpParserCommands $filename"
val filenamesString = filenames.mkString(" ")
s"php --php-ini $phpIniPath $phpParserPath $phpParserCommands $filenamesString"
}

def parseFile(inputPath: String): Option[(PhpFile, Option[String])] = {
val inputFile = File(inputPath)
val inputFilePath = inputFile.canonicalPath
val inputDirectory = inputFile.parent.canonicalPath
val command = phpParseCommand(inputFilePath)
ExternalCommand.run(command, inputDirectory) match {
case Success(output) =>
val content = Option.unless(disableFileContent)(inputFile.contentAsString)
processParserOutput(output, inputFilePath).map((_, content))
case Failure(exception) =>
logger.error(s"Failure running php-parser with $command", exception.getMessage)
None
def parseFiles(inputPaths: collection.Seq[String]): collection.Seq[(String, Option[PhpFile], String)] = {
// We need to keep a map between the input path and its canonical representation in
// order to map back the canonical file name we get from the php parser.
// Otherwise later on file name/path processing might get confused because the returned
// file paths are in no relation to the input paths.
val canonicalToInputPath = mutable.HashMap.empty[String, String]

inputPaths.foreach { inputPath =>
val canonicalPath = Path.of(inputPath).toFile.getCanonicalPath
canonicalToInputPath.put(canonicalPath, inputPath)
}
}

private def processParserOutput(output: Seq[String], filename: String): Option[PhpFile] = {
val maybeJson = linesToJsonValue(output, filename)
maybeJson.flatMap(jsonValueToPhpFile(_, filename))
val command = phpParseCommand(inputPaths)

val (returnValue, output) = ExternalCommand.runWithMergeStdoutAndStderr(command, ".")
returnValue match {
case 0 =>
val asJson = linesToJsonValues(output.lines().toArray(size => new Array[String](size)))
val asPhpFile = asJson.map { case (filename, jsonObjectOption, infoLines) =>
(filename, jsonToPhpFile(jsonObjectOption, filename), infoLines)
}
val withRemappedFileName = asPhpFile.map { case (filename, phpFileOption, infoLines) =>
(canonicalToInputPath.apply(filename), phpFileOption, infoLines)
}
withRemappedFileName
case exitCode =>
logger.error(s"Failure running php-parser with $command, exit code $exitCode")
Nil
}
}

private def linesToJsonValue(lines: Seq[String], filename: String): Option[ujson.Value] = {
if (lines.exists(_.startsWith("["))) {
val jsonString = lines.dropWhile(_.charAt(0) != '[').mkString
Try(Option(ujson.read(jsonString))) match {
case Success(Some(value)) => Some(value)
case Success(None) =>
logger.error(s"Parsing json string for $filename resulted in null return value")
None
case Failure(exception) =>
logger.error(s"Parsing json string for $filename failed with exception", exception)
private def jsonToPhpFile(jsonObject: Option[ujson.Value], filename: String): Option[PhpFile] = {
val phpFile = jsonObject.flatMap { jsonObject =>
Try(Domain.fromJson(jsonObject)) match {
case Success(phpFile) =>
Some(phpFile)
case Failure(e) =>
logger.error(s"Failed to generate intermediate AST for $filename", e)
None
}
} else {
logger.warn(s"No JSON output for $filename")
None
}
phpFile
}

private def jsonValueToPhpFile(json: ujson.Value, filename: String): Option[PhpFile] = {
Try(Domain.fromJson(json)) match {
case Success(phpFile) => Some(phpFile)
case Failure(e) =>
logger.error(s"Failed to generate intermediate AST for $filename", e)
None
enum PARSE_MODE {
case PARSE_INFO, PARSE_JSON, SKIP_TRAILER
}

private def linesToJsonValues(
lines: collection.Seq[String]
): collection.Seq[(String, Option[ujson.Value], String)] = {
val filePrefix = "====> File "
val filenameRegex = Pattern.compile(s"$filePrefix(.*):")
val result = mutable.ArrayBuffer.empty[(String, Option[ujson.Value], String)]

var filename = ""
val infoLines = mutable.ArrayBuffer.empty[String]
val jsonLines = mutable.ArrayBuffer.empty[String]

var mode = PARSE_MODE.SKIP_TRAILER
val linesIt = lines.iterator
while (linesIt.hasNext) {
val line = linesIt.next
mode match {
case PARSE_MODE.PARSE_INFO =>
if (line != "==> JSON dump:") {
infoLines.append(line)
} else {
mode = PARSE_MODE.PARSE_JSON
}
case PARSE_MODE.PARSE_JSON =>
jsonLines.append(line)
if (line.startsWith("]") || line == "[]") {
val jsonString = jsonLines.mkString

Try(Option(ujson.read(jsonString))) match {
case Success(option) =>
result.append((filename, option, infoLines.mkString))
if (option.isEmpty) {
logger.error(s"Parsing json string for $filename resulted in null return value")
}
case Failure(exception) =>
result.append((filename, None, infoLines.mkString))
logger.error(s"Parsing json string for $filename failed with exception", exception)
}

mode = PARSE_MODE.SKIP_TRAILER
}
case _ =>
}

if (line.startsWith(filePrefix)) {
val matcher = filenameRegex.matcher(line)
if (matcher.find()) {
filename = matcher.group(1)
infoLines.clear()
jsonLines.clear()
mode = PARSE_MODE.PARSE_INFO
}
}
}
result
}
}

Expand Down
Loading

0 comments on commit 5f89dfe

Please sign in to comment.