From 18038689c10e96ea54c5034f9a0b6049ab10c2ce Mon Sep 17 00:00:00 2001 From: David Baker Effendi Date: Tue, 28 May 2024 16:02:52 +0200 Subject: [PATCH] [ruby] Unhandled Receiver Types (#4608) This PR fixes previously unhandled call receivers, which creates an issue later during dynamic call linking as the receiver would be an Unknown node. * Handled quoted expanded regex literals * Handled constant variable references Additionally, this brings back warnings for `Unknown` nodes as these unhandled nodes are not syntax errors but don't necessarily have explicit warnings when unhandled. --- .../AstForExpressionsCreator.scala | 5 +---- .../parser/AntlrContextHelpers.scala | 18 ++++++++++++++++- .../rubysrc2cpg/parser/RubyNodeCreator.scala | 18 ++++++++++++++++- .../io/joern/rubysrc2cpg/passes/Defines.scala | 1 + .../rubysrc2cpg/querying/CallTests.scala | 20 ++++++++++++++++--- 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala index f7a61876cef9..cd4a7b442015 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstForExpressionsCreator.scala @@ -641,10 +641,7 @@ trait AstForExpressionsCreator(implicit withSchemaValidation: ValidationMode) { protected def astForUnknown(node: RubyNode): Ast = { val className = node.getClass.getSimpleName val text = code(node) - node match { - case _: Unknown => // Unknowns are syntax errors which are logged by the parser already - case _ => logger.warn(s"Could not represent expression: $text ($className) ($relativeFileName), skipping") - } + logger.warn(s"Could not represent expression: $text ($className) ($relativeFileName), skipping") Ast(unknownNode(node, text)) } diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/AntlrContextHelpers.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/AntlrContextHelpers.scala index fcf52523b688..eb2e0d9ecadc 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/AntlrContextHelpers.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/AntlrContextHelpers.scala @@ -92,7 +92,7 @@ object AntlrContextHelpers { sealed implicit class RegularExpressionLiteralContextHelper(ctx: RegularExpressionLiteralContext) { def isStatic: Boolean = !isDynamic - def isDynamic: Boolean = ctx.regexpLiteralContent.asScala.exists(c => Option(c.compoundStatement()).isDefined) + def isDynamic: Boolean = interpolations.nonEmpty def interpolations: List[ParserRuleContext] = ctx .regexpLiteralContent() @@ -102,6 +102,22 @@ object AntlrContextHelpers { .toList } + sealed implicit class QuotedExpandedRegularExpressionLiteralContextHelper( + ctx: QuotedExpandedRegularExpressionLiteralContext + ) { + + def isStatic: Boolean = !isDynamic + def isDynamic: Boolean = interpolations.nonEmpty + + def interpolations: List[ParserRuleContext] = ctx + .quotedExpandedLiteralStringContent() + .asScala + .filter(ctx => Option(ctx.compoundStatement()).isDefined) + .map(ctx => ctx.compoundStatement()) + .toList + + } + sealed implicit class CurlyBracesBlockContextHelper(ctx: CurlyBracesBlockContext) { def parameters: List[ParserRuleContext] = Option(ctx.blockParameter()).map(_.parameters).getOrElse(List()) } diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyNodeCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyNodeCreator.scala index 0a7424501a1b..eff4cd63e8c8 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyNodeCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/parser/RubyNodeCreator.scala @@ -2,7 +2,7 @@ package io.joern.rubysrc2cpg.parser import io.joern.rubysrc2cpg.astcreation.RubyIntermediateAst.* import io.joern.rubysrc2cpg.parser.AntlrContextHelpers.* -import io.joern.rubysrc2cpg.parser.RubyParser.CommandWithDoBlockContext +import io.joern.rubysrc2cpg.parser.RubyParser.{CommandWithDoBlockContext, ConstantVariableReferenceContext} import io.joern.rubysrc2cpg.passes.Defines import io.joern.rubysrc2cpg.passes.Defines.getBuiltInType import io.joern.rubysrc2cpg.utils.FreshNameGenerator @@ -373,6 +373,16 @@ class RubyNodeCreator extends RubyParserBaseVisitor[RubyNode] { } } + override def visitQuotedExpandedRegularExpressionLiteral( + ctx: RubyParser.QuotedExpandedRegularExpressionLiteralContext + ): RubyNode = { + if (ctx.isStatic) { + StaticLiteral(getBuiltInType(Defines.Regexp))(ctx.toTextSpan) + } else { + DynamicLiteral(getBuiltInType(Defines.Regexp), ctx.interpolations.map(visit))(ctx.toTextSpan) + } + } + override def visitCurlyBracesBlock(ctx: RubyParser.CurlyBracesBlockContext): RubyNode = { val parameters = Option(ctx.blockParameter()).fold(List())(_.parameters).map(visit) val body = visit(ctx.compoundStatement()) @@ -735,6 +745,12 @@ class RubyNodeCreator extends RubyParserBaseVisitor[RubyNode] { Unknown()(ctx.toTextSpan) } + override def visitConstantVariableReference(ctx: ConstantVariableReferenceContext): RubyNode = { + MemberAccess(SelfIdentifier()(ctx.toTextSpan.spanStart(Defines.Self)), "::", ctx.CONSTANT_IDENTIFIER().getText)( + ctx.toTextSpan + ) + } + override def visitIndexingAccessExpression(ctx: RubyParser.IndexingAccessExpressionContext): RubyNode = { IndexAccess( visit(ctx.primaryValue()), diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/Defines.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/Defines.scala index fa0882c8d4f1..81dff11e9f82 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/Defines.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/passes/Defines.scala @@ -23,6 +23,7 @@ object Defines { val Proc: String = "proc" val This: String = "this" val Loop: String = "loop" + val Self: String = "self" val Program: String = ":program" diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/CallTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/CallTests.scala index fc68712a0a1c..f7a81941d959 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/CallTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/querying/CallTests.scala @@ -3,7 +3,7 @@ package io.joern.rubysrc2cpg.querying import io.joern.rubysrc2cpg.passes.Defines.RubyOperators import io.joern.rubysrc2cpg.testfixtures.RubyCode2CpgFixture import io.joern.x2cpg.Defines -import io.shiftleft.codepropertygraph.generated.{DispatchTypes, Operators} +import io.shiftleft.codepropertygraph.generated.{DispatchTypes, NodeTypes, Operators} import io.shiftleft.codepropertygraph.generated.nodes.{Block, Call, Identifier, Literal} import io.shiftleft.semanticcpg.language.* @@ -198,11 +198,25 @@ class CallTests extends RubyCode2CpgFixture { } "named parameters in parenthesis-less call to a symbol value should create a correctly named argument" in { - val cpg = code("on in: :sequence") - + val cpg = code("on in: :sequence") val List(_, inArg) = cpg.call.argument.l: @unchecked inArg.code shouldBe ":sequence" inArg.argumentName shouldBe Option("in") } + "a call with a quoted regex literal should have a literal receiver" in { + val cpg = code("%r{^/}.freeze") + val List(regexLiteral: Literal) = cpg.call.nameExact("freeze").receiver.l: @unchecked + regexLiteral.typeFullName shouldBe "__builtin.Regexp" + regexLiteral.code shouldBe "%r{^/}" + } + + "a call with a double colon receiver" in { + val cpg = code("::Augeas.open { |aug| aug.get('/augeas/version') }") + val List(augeas: Call) = cpg.call.nameExact("open").receiver.l: @unchecked + // TODO: Right now this is seen as a "getter" but should _probably_ be a field access, e.g. self.Augeas + augeas.methodFullName shouldBe "Test0.rb:::program:Augeas" + augeas.code shouldBe "::Augeas" + } + }