Skip to content

Commit

Permalink
RubyLexer: Generalized non-expanded delimited array (#3447)
Browse files Browse the repository at this point in the history
  • Loading branch information
xavierpinho authored Aug 7, 2023
1 parent 232f923 commit 194e0f4
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 197 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ QUOTED_NON_EXPANDED_STRING_LITERAL_START
: '%q' {!Character.isAlphabetic(_input.LA(1))}?
{
pushNonExpandedDelimiter(_input.LA(1));
setNonExpandedDelimitedStringEndToken(QUOTED_NON_EXPANDED_STRING_LITERAL_END);
setNonExpandedDelimiterEndToken(QUOTED_NON_EXPANDED_STRING_LITERAL_END);
_input.consume();
pushMode(NON_EXPANDED_DELIMITED_STRING_MODE);
}
Expand All @@ -299,7 +299,7 @@ QUOTED_NON_EXPANDED_REGULAR_EXPRESSION_START
: '%r' {!Character.isAlphabetic(_input.LA(1))}?
{
pushNonExpandedDelimiter(_input.LA(1));
setNonExpandedDelimitedStringEndToken(QUOTED_NON_EXPANDED_REGULAR_EXPRESSION_END);
setNonExpandedDelimiterEndToken(QUOTED_NON_EXPANDED_REGULAR_EXPRESSION_END);
_input.consume();
pushMode(NON_EXPANDED_DELIMITED_STRING_MODE);
};
Expand All @@ -311,9 +311,10 @@ QUOTED_NON_EXPANDED_REGULAR_EXPRESSION_START
QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_START
: '%w' {!Character.isAlphabetic(_input.LA(1))}?
{
pushQuotedNonExpandedStringArrayDelimiter(_input.LA(1));
pushNonExpandedDelimiter(_input.LA(1));
setNonExpandedDelimiterEndToken(QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_END);
_input.consume();
pushMode(QUOTED_NON_EXPANDED_STRING_ARRAY_MODE);
pushMode(NON_EXPANDED_DELIMITED_ARRAY_MODE);
}
;

Expand All @@ -324,9 +325,10 @@ QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_START
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_START
: '%i' {!Character.isAlphabetic(_input.LA(1))}?
{
pushQuotedNonExpandedSymbolArrayDelimiter(_input.LA(1));
pushNonExpandedDelimiter(_input.LA(1));
setNonExpandedDelimiterEndToken(QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_END);
_input.consume();
pushMode(QUOTED_NON_EXPANDED_SYMBOL_ARRAY_MODE);
pushMode(NON_EXPANDED_DELIMITED_ARRAY_MODE);
}
;

Expand Down Expand Up @@ -684,69 +686,12 @@ NON_EXPANDED_LITERAL_CHARACTER
;
// --------------------------------------------------------
// %w string (word) array mode
// --------------------------------------------------------
mode QUOTED_NON_EXPANDED_STRING_ARRAY_MODE;
fragment QUOTED_NON_EXPANDED_ESCAPED_STRING_ARRAY_CHARACTER
: '\\' QUOTED_NON_EXPANDED_NON_ESCAPED_STRING_ARRAY_CHARACTER
;
fragment QUOTED_NON_EXPANDED_NON_ESCAPED_STRING_ARRAY_CHARACTER
: ~[\r\n]
| '\n' {_input.LA(1) != '\r'}?
;
fragment QUOTED_NON_EXPANDED_STRING_ARRAY_DELIMITER
: [\u0009]
| [\u000b]
| [\u000c]
| [\u000d]
| [\u0020]
| '\\' ('\r'? '\n')
;
QUOTED_NON_EXPANDED_STRING_ARRAY_SEPARATOR
: QUOTED_NON_EXPANDED_STRING_ARRAY_DELIMITER+
;
QUOTED_NON_EXPANDED_STRING_ARRAY_CHARACTER
: QUOTED_NON_EXPANDED_ESCAPED_STRING_ARRAY_CHARACTER
| QUOTED_NON_EXPANDED_NON_ESCAPED_STRING_ARRAY_CHARACTER
{
int readChar = _input.LA(-1);
if (isQuotedNonExpandedStringArrayClosingDelimiter(readChar)) {
popQuotedNonExpandedStringArrayDelimiter();
if (isQuotedNonExpandedStringArrayDelimitersEmpty()) {
setType(QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_END);
popMode();
}
}
else if (isQuotedNonExpandedStringArrayOpeningDelimiter(readChar)) {
pushQuotedNonExpandedStringArrayDelimiter(readChar);
}
}
;
// --------------------------------------------------------
// %i symbol array mode
// Non-expanded delimited array mode
// --------------------------------------------------------
mode QUOTED_NON_EXPANDED_SYMBOL_ARRAY_MODE;
fragment QUOTED_NON_EXPANDED_ESCAPED_SYMBOL_ARRAY_CHARACTER
: '\\' QUOTED_NON_EXPANDED_NON_ESCAPED_SYMBOL_ARRAY_CHARACTER
;
fragment QUOTED_NON_EXPANDED_NON_ESCAPED_SYMBOL_ARRAY_CHARACTER
: ~[\r\n]
| '\n' {_input.LA(1) != '\r'}?
;
mode NON_EXPANDED_DELIMITED_ARRAY_MODE;
fragment QUOTED_NON_EXPANDED_SYMBOL_ARRAY_DELIMITER
fragment NON_EXPANDED_ARRAY_ITEM_DELIMITER
: [\u0009]
| [\u000a]
| [\u000b]
Expand All @@ -756,31 +701,30 @@ fragment QUOTED_NON_EXPANDED_SYMBOL_ARRAY_DELIMITER
| '\\' ('\r'? '\n')
;
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_SEPARATOR
: QUOTED_NON_EXPANDED_SYMBOL_ARRAY_DELIMITER+
NON_EXPANDED_ARRAY_ITEM_SEPARATOR
: NON_EXPANDED_ARRAY_ITEM_DELIMITER+
;
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_CHARACTER
: QUOTED_NON_EXPANDED_ESCAPED_SYMBOL_ARRAY_CHARACTER
| QUOTED_NON_EXPANDED_NON_ESCAPED_SYMBOL_ARRAY_CHARACTER
NON_EXPANDED_ARRAY_ITEM_CHARACTER
: NON_EXPANDED_LITERAL_ESCAPE_SEQUENCE
| NON_ESCAPED_LITERAL_CHARACTER
{
int readChar = _input.LA(-1);
if (isQuotedNonExpandedSymbolArrayClosingDelimiter(readChar)) {
popQuotedNonExpandedSymbolArrayDelimiter();
if (isNonExpandedClosingDelimiter(readChar)) {
popNonExpandedDelimiter();
if (isQuotedNonExpandedSymbolArrayDelimitersEmpty()) {
setType(QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_END);
if (isNonExpandedDelimitersStackEmpty()) {
setType(getNonExpandedDelimitedStringEndToken());
popMode();
}
}
else if (isQuotedNonExpandedSymbolArrayOpeningDelimiter(readChar)) {
pushQuotedNonExpandedSymbolArrayDelimiter(readChar);
else if (isNonExpandedOpeningDelimiter(readChar)) {
pushNonExpandedDelimiter(readChar);
}
}
;
// --------------------------------------------------------
// Regex literal mode
// --------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,26 +261,26 @@ arrayConstructor
nonExpandedWordArrayElements?
QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_END # nonExpandedWordArrayConstructor
| QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_START
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_SEPARATOR*
NON_EXPANDED_ARRAY_ITEM_SEPARATOR*
nonExpandedSymbolArrayElements?
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_SEPARATOR*
NON_EXPANDED_ARRAY_ITEM_SEPARATOR*
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_END # nonExpandedSymbolArrayConstructor
;

nonExpandedSymbolArrayElements
: nonExpandedSymbolArrayElement (QUOTED_NON_EXPANDED_SYMBOL_ARRAY_SEPARATOR+ nonExpandedSymbolArrayElement)*
: nonExpandedSymbolArrayElement (NON_EXPANDED_ARRAY_ITEM_SEPARATOR+ nonExpandedSymbolArrayElement)*
;

nonExpandedSymbolArrayElement
: QUOTED_NON_EXPANDED_SYMBOL_ARRAY_CHARACTER+
: NON_EXPANDED_ARRAY_ITEM_CHARACTER+
;

nonExpandedWordArrayElements
: nonExpandedWordArrayElement (QUOTED_NON_EXPANDED_STRING_ARRAY_SEPARATOR nonExpandedWordArrayElement)*
: nonExpandedWordArrayElement (NON_EXPANDED_ARRAY_ITEM_SEPARATOR+ nonExpandedWordArrayElement)*
;

nonExpandedWordArrayElement
: QUOTED_NON_EXPANDED_STRING_ARRAY_CHARACTER+
: NON_EXPANDED_ARRAY_ITEM_CHARACTER+
;


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package io.joern.rubysrc2cpg.parser

import scala.collection.mutable

trait NonExpandedDelimitedStringHandling { this: RubyLexerBase =>
trait NonExpandedDelimiterHandling { this: RubyLexerBase =>

private val delimiters = mutable.Stack[Int]()
private var endTokenType = 0
Expand All @@ -27,15 +27,11 @@ trait NonExpandedDelimitedStringHandling { this: RubyLexerBase =>
char == currentClosingDelimiter()
}

def isNonExpandedDelimiter(char: Int): Boolean = {
isNonExpandedOpeningDelimiter(char) || isNonExpandedClosingDelimiter(char)
}

private def currentOpeningDelimiter(): Int = {
delimiters.top
}

def setNonExpandedDelimitedStringEndToken(endTokenType: Int): Unit = {
def setNonExpandedDelimiterEndToken(endTokenType: Int): Unit = {
this.endTokenType = endTokenType
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ abstract class RubyLexerBase(input: CharStream)
extends Lexer(input)
with RubyLexerRegexHandling
with RubyLexerStringInterpolationHandling
with RubyLexerQuotedNonExpandedStringArrayHandling
with RubyLexerQuotedNonExpandedSymbolArrayHandling
with NonExpandedDelimitedStringHandling {
with NonExpandedDelimiterHandling {

/** The previously (non-WS) emitted token (in DEFAULT_CHANNEL.) */
protected var previousNonWsToken: Option[Token] = None
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ class RubyLexerTests extends AnyFlatSpec with Matchers {
Seq("%w(x)", "%w[y]", "%w{z}", "%w<w>", "%w#a#", "%w!b!", "%w-_-", "%w@c@", "%w+d+", "%w*e*", "%w/#/", "%w&!&")
all(eg.map(tokenize)) shouldBe Seq(
QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_START,
QUOTED_NON_EXPANDED_STRING_ARRAY_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_END,
EOF
)
Expand All @@ -616,10 +616,10 @@ class RubyLexerTests extends AnyFlatSpec with Matchers {
)
all(eg.map(tokenize)) shouldBe Seq(
QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_START,
QUOTED_NON_EXPANDED_STRING_ARRAY_CHARACTER,
QUOTED_NON_EXPANDED_STRING_ARRAY_CHARACTER,
QUOTED_NON_EXPANDED_STRING_ARRAY_SEPARATOR,
QUOTED_NON_EXPANDED_STRING_ARRAY_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_SEPARATOR,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_END,
EOF
)
Expand All @@ -629,9 +629,9 @@ class RubyLexerTests extends AnyFlatSpec with Matchers {
val code = """%w[x\ y]"""
tokenize(code) shouldBe Seq(
QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_START,
QUOTED_NON_EXPANDED_STRING_ARRAY_CHARACTER,
QUOTED_NON_EXPANDED_STRING_ARRAY_CHARACTER,
QUOTED_NON_EXPANDED_STRING_ARRAY_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
QUOTED_NON_EXPANDED_STRING_ARRAY_LITERAL_END,
EOF
)
Expand All @@ -651,7 +651,7 @@ class RubyLexerTests extends AnyFlatSpec with Matchers {
Seq("%i(x)", "%i[y]", "%i{z}", "%i<w>", "%i#a#", "%i!b!", "%i-_-", "%i@c@", "%i+d+", "%i*e*", "%i/#/", "%i&!&")
all(eg.map(tokenize)) shouldBe Seq(
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_START,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_END,
EOF
)
Expand All @@ -674,10 +674,10 @@ class RubyLexerTests extends AnyFlatSpec with Matchers {
)
all(eg.map(tokenize)) shouldBe Seq(
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_START,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_CHARACTER,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_CHARACTER,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_SEPARATOR,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_SEPARATOR,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_END,
EOF
)
Expand All @@ -691,11 +691,11 @@ class RubyLexerTests extends AnyFlatSpec with Matchers {
|)""".stripMargin
tokenize(code) shouldBe Seq(
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_START,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_SEPARATOR,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_CHARACTER,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_SEPARATOR,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_CHARACTER,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_SEPARATOR,
NON_EXPANDED_ARRAY_ITEM_SEPARATOR,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_SEPARATOR,
NON_EXPANDED_ARRAY_ITEM_CHARACTER,
NON_EXPANDED_ARRAY_ITEM_SEPARATOR,
QUOTED_NON_EXPANDED_SYMBOL_ARRAY_LITERAL_END,
EOF
)
Expand Down

0 comments on commit 194e0f4

Please sign in to comment.