From 2daeac83ff9a6a7bad423a63ed2148f733850e1b Mon Sep 17 00:00:00 2001 From: Xavier Pinho Date: Thu, 27 Jul 2023 14:20:29 +0100 Subject: [PATCH 1/2] RubyParser: only emit assignment-like identifiers after `DEF` --- .../io/joern/rubysrc2cpg/parser/RubyLexer.g4 | 11 ++- .../io/joern/rubysrc2cpg/parser/RubyParser.g4 | 2 +- .../rubysrc2cpg/astcreation/AstCreator.scala | 18 +--- .../parser/MethodDefinitionTests.scala | 99 ++++++++++++++++++- .../rubysrc2cpg/parser/RubyLexerTests.scala | 11 +++ 5 files changed, 117 insertions(+), 24 deletions(-) diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/antlr4/io/joern/rubysrc2cpg/parser/RubyLexer.g4 b/joern-cli/frontends/rubysrc2cpg/src/main/antlr4/io/joern/rubysrc2cpg/parser/RubyLexer.g4 index bd968f5d8189..e35d700bfc9c 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/antlr4/io/joern/rubysrc2cpg/parser/RubyLexer.g4 +++ b/joern-cli/frontends/rubysrc2cpg/src/main/antlr4/io/joern/rubysrc2cpg/parser/RubyLexer.g4 @@ -430,7 +430,7 @@ fragment LINE_TERMINATOR // -------------------------------------------------------- SYMBOL_LITERAL - : ':' SYMBOL_NAME + : ':' (SYMBOL_NAME | (CONSTANT_IDENTIFIER | LOCAL_VARIABLE_IDENTIFIER) '=') // This check exists to prevent issuing a SYMBOL_LITERAL in whitespace-free associations, e.g. // in `foo(x:y)`, so that `:y` is not a SYMBOL_LITERAL // or in `{:x=>1}`, so that `:x=` is not a SYMBOL_LITERAL @@ -444,7 +444,6 @@ fragment SYMBOL_NAME | CONSTANT_IDENTIFIER | LOCAL_VARIABLE_IDENTIFIER | METHOD_ONLY_IDENTIFIER - | ASSIGNMENT_LIKE_METHOD_IDENTIFIER | OPERATOR_METHOD_NAME | KEYWORD // NOTE: Even though we have PLUSAT and MINUSAT in OPERATOR_METHOD_NAME, the former @@ -483,8 +482,12 @@ fragment METHOD_ONLY_IDENTIFIER : (CONSTANT_IDENTIFIER | LOCAL_VARIABLE_IDENTIFIER) ('!' | '?') ; -fragment ASSIGNMENT_LIKE_METHOD_IDENTIFIER - : (CONSTANT_IDENTIFIER | LOCAL_VARIABLE_IDENTIFIER) '=' + +// Similarly to PLUSAT/MINUSAT, this should only occur after a DEF token. +// Otherwise, the assignment `x=nil` would be parsed as (ASSIGNMENT_LIKE_METHOD_IDENTIFIER, NIL) +// instead of the more appropriate (LOCAL_VARIABLE_IDENTIFIER, EQ, NIL). +ASSIGNMENT_LIKE_METHOD_IDENTIFIER + : (CONSTANT_IDENTIFIER | LOCAL_VARIABLE_IDENTIFIER) '=' {previousNonWsTokenTypeOrEOF() == DEF}? ; fragment IDENTIFIER_CHARACTER diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/antlr4/io/joern/rubysrc2cpg/parser/RubyParser.g4 b/joern-cli/frontends/rubysrc2cpg/src/main/antlr4/io/joern/rubysrc2cpg/parser/RubyParser.g4 index ae8554bbbf73..0ced55513d05 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/antlr4/io/joern/rubysrc2cpg/parser/RubyParser.g4 +++ b/joern-cli/frontends/rubysrc2cpg/src/main/antlr4/io/joern/rubysrc2cpg/parser/RubyParser.g4 @@ -315,7 +315,7 @@ definedMethodName ; assignmentLikeMethodIdentifier - : (CONSTANT_IDENTIFIER | LOCAL_VARIABLE_IDENTIFIER) EQ + : ASSIGNMENT_LIKE_METHOD_IDENTIFIER ; methodName diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala index c195e44f317d..d647534f9270 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala @@ -1035,23 +1035,7 @@ class AstCreator( } } def astForAssignmentLikeMethodIdentifierContext(ctx: AssignmentLikeMethodIdentifierContext): Seq[Ast] = { - if (ctx == null) return Seq(Ast()) - - val terminalNode = Option(ctx.LOCAL_VARIABLE_IDENTIFIER()) match - case Some(value) => value - case None => ctx.CONSTANT_IDENTIFIER() - - val methodName = terminalNode.getText + "=" - val callNode = NewCall() - .name(methodName) - .code(ctx.getText) - .methodFullName(methodName) - .signature("") - .dispatchType(DispatchTypes.STATIC_DISPATCH) - .typeFullName(Defines.Any) - .lineNumber(terminalNode.getSymbol().getLine()) - .columnNumber(terminalNode.getSymbol().getCharPositionInLine()) - Seq(callAst(callNode)) + Seq(callAst(callNode(ctx, ctx.getText, ctx.getText, ctx.getText, DispatchTypes.STATIC_DISPATCH, Some(""), Some(Defines.Any)))) } def astForDefinedMethodNameContext(ctx: DefinedMethodNameContext): Seq[Ast] = { diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/MethodDefinitionTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/MethodDefinitionTests.scala index b7ac8e76129f..37b5900943f8 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/MethodDefinitionTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/MethodDefinitionTests.scala @@ -552,8 +552,7 @@ class MethodDefinitionTests extends RubyParserAbstractTest { | SimpleMethodNamePart | DefinedMethodName | AssignmentLikeMethodIdentifier - | foo2 - | = + | foo2= | MethodParameterPart | ( | Parameters @@ -569,6 +568,102 @@ class MethodDefinitionTests extends RubyParserAbstractTest { | Separator""".stripMargin } + + // This test makes sure that the `end` after `def foo2=` is not parsed as part of its definition, + // which could happen if `foo2=` was parsed as two separate tokens (LOCAL_VARIABLE_IDENTIFIER, EQ) + // instead of just ASSIGNMENT_LIKE_METHOD_IDENTIFIER. + // Issue report: https://github.com/joernio/joern/issues/3270 + "its name ends in `=` and the next keyword is `end`" in { + val code = + """module SomeModule + |def foo1 + | return unless true + |end + |def foo2=(arg) + |end + |end + |""".stripMargin + printAst(_.compoundStatement(), code) shouldEqual + """CompoundStatement + | Statements + | ExpressionOrCommandStatement + | ExpressionExpressionOrCommand + | PrimaryExpression + | ModuleDefinitionPrimary + | ModuleDefinition + | module + | WsOrNl + | ClassOrModuleReference + | SomeModule + | WsOrNl + | BodyStatement + | CompoundStatement + | Statements + | ExpressionOrCommandStatement + | ExpressionExpressionOrCommand + | PrimaryExpression + | MethodDefinitionPrimary + | MethodDefinition + | def + | WsOrNl + | SimpleMethodNamePart + | DefinedMethodName + | MethodName + | MethodIdentifier + | foo1 + | MethodParameterPart + | Separator + | WsOrNl + | BodyStatement + | CompoundStatement + | Statements + | ModifierStatement + | ExpressionOrCommandStatement + | InvocationExpressionOrCommand + | ReturnArgsInvocationWithoutParentheses + | return + | unless + | WsOrNl + | ExpressionOrCommandStatement + | ExpressionExpressionOrCommand + | PrimaryExpression + | VariableReferencePrimary + | PseudoVariableIdentifierVariableReference + | TruePseudoVariableIdentifier + | true + | Separators + | Separator + | end + | Separators + | Separator + | ExpressionOrCommandStatement + | ExpressionExpressionOrCommand + | PrimaryExpression + | MethodDefinitionPrimary + | MethodDefinition + | def + | WsOrNl + | SimpleMethodNamePart + | DefinedMethodName + | AssignmentLikeMethodIdentifier + | foo2= + | MethodParameterPart + | ( + | Parameters + | Parameter + | MandatoryParameter + | arg + | ) + | Separator + | BodyStatement + | CompoundStatement + | end + | Separators + | Separator + | end + | Separators + | Separator""".stripMargin + } } } diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/RubyLexerTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/RubyLexerTests.scala index 512775f5c7b4..31d9505eba7e 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/RubyLexerTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/RubyLexerTests.scala @@ -122,6 +122,11 @@ class RubyLexerTests extends AnyFlatSpec with Matchers { val eg = Seq(":^", ":==", ":[]", ":[]=", ":+", ":%", ":**", ":>>", ":+@") all(eg.map(tokenize)) shouldBe Seq(SYMBOL_LITERAL, EOF) } + + "Assignment-like-named symbols" should "be recognized as such" in { + val eg = Seq(":X=", ":xyz=") + all(eg.map(tokenize)) shouldBe Seq(SYMBOL_LITERAL, EOF) + } "Local variable identifiers" should "be recognized as such" in { val eg = Seq("i", "x1", "old_value", "_internal", "_while") @@ -662,4 +667,10 @@ class RubyLexerTests extends AnyFlatSpec with Matchers { val eg = Seq("$0", "$10", "$2", "$3") all(eg.map(tokenize)) shouldBe Seq(GLOBAL_VARIABLE_IDENTIFIER, EOF) } + + "Assignment-like method identifiers" should "be recognized as such" in { + val eg = Seq("def x=", "def X=") + all(eg.map(tokenize)) shouldBe Seq(DEF, WS, ASSIGNMENT_LIKE_METHOD_IDENTIFIER, EOF) + } + } From 4ce76d39300855f88da348864d33be86873972de Mon Sep 17 00:00:00 2001 From: Xavier Pinho Date: Thu, 27 Jul 2023 14:21:30 +0100 Subject: [PATCH 2/2] scalafmt --- .../scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala | 6 +++++- .../io/joern/rubysrc2cpg/parser/MethodDefinitionTests.scala | 2 +- .../scala/io/joern/rubysrc2cpg/parser/RubyLexerTests.scala | 6 +++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala index d647534f9270..4b8efbf7ba61 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/main/scala/io/joern/rubysrc2cpg/astcreation/AstCreator.scala @@ -1035,7 +1035,11 @@ class AstCreator( } } def astForAssignmentLikeMethodIdentifierContext(ctx: AssignmentLikeMethodIdentifierContext): Seq[Ast] = { - Seq(callAst(callNode(ctx, ctx.getText, ctx.getText, ctx.getText, DispatchTypes.STATIC_DISPATCH, Some(""), Some(Defines.Any)))) + Seq( + callAst( + callNode(ctx, ctx.getText, ctx.getText, ctx.getText, DispatchTypes.STATIC_DISPATCH, Some(""), Some(Defines.Any)) + ) + ) } def astForDefinedMethodNameContext(ctx: DefinedMethodNameContext): Seq[Ast] = { diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/MethodDefinitionTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/MethodDefinitionTests.scala index 37b5900943f8..a12d0acafcf8 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/MethodDefinitionTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/MethodDefinitionTests.scala @@ -568,7 +568,7 @@ class MethodDefinitionTests extends RubyParserAbstractTest { | Separator""".stripMargin } - + // This test makes sure that the `end` after `def foo2=` is not parsed as part of its definition, // which could happen if `foo2=` was parsed as two separate tokens (LOCAL_VARIABLE_IDENTIFIER, EQ) // instead of just ASSIGNMENT_LIKE_METHOD_IDENTIFIER. diff --git a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/RubyLexerTests.scala b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/RubyLexerTests.scala index 31d9505eba7e..82d22328048b 100644 --- a/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/RubyLexerTests.scala +++ b/joern-cli/frontends/rubysrc2cpg/src/test/scala/io/joern/rubysrc2cpg/parser/RubyLexerTests.scala @@ -122,7 +122,7 @@ class RubyLexerTests extends AnyFlatSpec with Matchers { val eg = Seq(":^", ":==", ":[]", ":[]=", ":+", ":%", ":**", ":>>", ":+@") all(eg.map(tokenize)) shouldBe Seq(SYMBOL_LITERAL, EOF) } - + "Assignment-like-named symbols" should "be recognized as such" in { val eg = Seq(":X=", ":xyz=") all(eg.map(tokenize)) shouldBe Seq(SYMBOL_LITERAL, EOF) @@ -667,10 +667,10 @@ class RubyLexerTests extends AnyFlatSpec with Matchers { val eg = Seq("$0", "$10", "$2", "$3") all(eg.map(tokenize)) shouldBe Seq(GLOBAL_VARIABLE_IDENTIFIER, EOF) } - + "Assignment-like method identifiers" should "be recognized as such" in { val eg = Seq("def x=", "def X=") all(eg.map(tokenize)) shouldBe Seq(DEF, WS, ASSIGNMENT_LIKE_METHOD_IDENTIFIER, EOF) } - + }