peggyjs · QaDeS · Sep 1, 2023 · Sep 1, 2023 · Sep 1, 2023 · Sep 1, 2023
diff --git a/docs/js/benchmark-bundle.min.js b/docs/js/benchmark-bundle.min.js
diff --git a/docs/js/test-bundle.min.js b/docs/js/test-bundle.min.js
diff --git a/docs/vendor/peggy/peggy.min.js b/docs/vendor/peggy/peggy.min.js
diff --git a/examples/javascript.pegjs b/examples/javascript.pegjs
@@ -303,12 +303,12 @@ SingleEscapeCharacter
   = "'"
   / '"'
   / "\\"
-  / "b"  { return "\b"; }
-  / "f"  { return "\f"; }
-  / "n"  { return "\n"; }
-  / "r"  { return "\r"; }
-  / "t"  { return "\t"; }
-  / "v"  { return "\v"; }
+  / "b"%"\b"
+  / "f"%"\f"
+  / "n"%"\n"
+  / "r"%"\r"
+  / "t"%"\t"
+  / "v"%"\v"
 
 NonEscapeCharacter
   = !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); }

diff --git a/examples/json.pegjs b/examples/json.pegjs
@@ -123,11 +123,11 @@ char
         '"'
       / "\\"
       / "/"
-      / "b" { return "\b"; }
-      / "f" { return "\f"; }
-      / "n" { return "\n"; }
-      / "r" { return "\r"; }
-      / "t" { return "\t"; }
+      / "b"%"\b"
+      / "f"%"\f"
+      / "n"%"\n"
+      / "r"%"\r"
+      / "t"%"\t"
       / "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) {
           return String.fromCharCode(parseInt(digits, 16));
         }

diff --git a/lib/compiler/opcodes.js b/lib/compiler/opcodes.js
@@ -42,6 +42,7 @@ const opcodes = {
   MATCH_REGEXP:     20,    // MATCH_REGEXP r, a, f, ...
   ACCEPT_N:         21,    // ACCEPT_N n
   ACCEPT_STRING:    22,    // ACCEPT_STRING s
+  MAP_STRING_N:     41,    // MAP_STRING_N s, n
   FAIL:             23,    // FAIL e
 
   // Calls
@@ -76,6 +77,8 @@ const opcodes = {
   SOURCE_MAP_POP:          38,   // SOURCE_MAP_POP
   SOURCE_MAP_LABEL_PUSH:   39,   // SOURCE_MAP_LABEL_PUSH sp, literal-index, loc-index
   SOURCE_MAP_LABEL_POP:    40,   // SOURCE_MAP_LABEL_POP sp
+
+  // MAP_STRING_N:     41,    // MAP_STRING_N s, n
 };
 
 module.exports = opcodes;
diff --git a/lib/compiler/passes/generate-bytecode.js b/lib/compiler/passes/generate-bytecode.js
@@ -189,6 +189,11 @@ const { ALWAYS_MATCH, SOMETIMES_MATCH, NEVER_MATCH } = require("./inference-matc
 //        stack.push(literals[s]);
 //        currPos += literals[s].length;
 //
+// [41] MAP_STRING_N s, n
+//
+//        stack.push(literals[s]);
+//        currPos += n;
+//
 // [23] FAIL e
 //
 //        stack.push(FAILED);
@@ -969,17 +974,22 @@ function generateBytecode(ast, options) {
     },
 
     literal(node) {
-      if (node.value.length > 0) {
+      const isMapped = node.mappedValue !== null;
+      if (isMapped || node.value.length > 0) {
         const match = node.match | 0;
         // String only required if condition is generated or string is
         // case-sensitive and node always match
-        const needConst = match === SOMETIMES_MATCH
+        const needConst = isMapped
+                      || match === SOMETIMES_MATCH
                       || (match === ALWAYS_MATCH && !node.ignoreCase);
         const stringIndex = needConst
           ? addLiteralConst(
             node.ignoreCase ? node.value.toLowerCase() : node.value
           )
           : null;
+        const outIndex = isMapped
+          ? addLiteralConst(node.mappedValue)
+          : null;
         // Expectation not required if node always match
         const expectedIndex = (match !== ALWAYS_MATCH)
           ? addExpectedConst({
@@ -997,9 +1007,11 @@ function generateBytecode(ast, options) {
           node.ignoreCase
             ? [op.MATCH_STRING_IC, stringIndex]
             : [op.MATCH_STRING, stringIndex],
-          node.ignoreCase
-            ? [op.ACCEPT_N, node.value.length]
-            : [op.ACCEPT_STRING, stringIndex],
+          (isMapped
+            && [op.MAP_STRING_N, outIndex, node.value.length])
+            || (node.ignoreCase
+              ? [op.ACCEPT_N, node.value.length]
+              : [op.ACCEPT_STRING, stringIndex]),
           [op.FAIL, expectedIndex]
         );
       }

diff --git a/lib/compiler/passes/generate-js.js b/lib/compiler/passes/generate-js.js
@@ -150,6 +150,9 @@ function generateJS(ast, options) {
 
   function generateTables() {
     function buildLiteral(literal) {
+      if (Array.isArray(literal)) {
+        return "[" + literal.map(buildLiteral).join(",") + "]";
+      }
       return "\"" + stringEscape(literal) + "\"";
     }
 
@@ -558,6 +561,16 @@ function generateJS(ast, options) {
             ip += 2;
             break;
 
+          case op.MAP_STRING_N:      // MAP_STRING_N s, n
+            parts.push(stack.push(l(bc[ip + 1])));
+            parts.push(
+              bc[ip + 2] > 1
+                ? "peg$currPos += " + bc[ip + 2] + ";"
+                : "peg$currPos++;"
+            );
+            ip += 3;
+            break;
+
           case op.FAIL:               // FAIL e
             parts.push(stack.push("peg$FAILED"));
             parts.push("if (peg$silentFails === 0) { peg$fail(" + e(bc[ip + 1]) + "); }");

diff --git a/lib/parser.js b/lib/parser.js
diff --git a/lib/peg.d.ts b/lib/peg.d.ts
@@ -281,6 +281,8 @@ declare namespace ast {
   interface Literal extends Expr<"literal"> {
     /** Sequence of symbols to match. */
     value: string;
+    /** Value to output instead, if present */
+    mappedValue?: string;
     /** If `true`, symbols matches even if they case do not match case in the `value`. */
     ignoreCase: boolean;
   }

diff --git a/src/parser.pegjs b/src/parser.pegjs
@@ -21,24 +21,6 @@
 //
 // [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm
 
-{{
-  const OPS_TO_PREFIXED_TYPES = {
-    "$": "text",
-    "&": "simple_and",
-    "!": "simple_not"
-  };
-
-  const OPS_TO_SUFFIXED_TYPES = {
-    "?": "optional",
-    "*": "zero_or_more",
-    "+": "one_or_more"
-  };
-
-  const OPS_TO_SEMANTIC_PREDICATE_TYPES = {
-    "&": "semantic_and",
-    "!": "semantic_not"
-  };
-}}
 {
   // Cannot use Set here because of native IE support.
   const reservedWords = options.reservedWords || [];
@@ -177,22 +159,23 @@ LabelColon
 PrefixedExpression
   = operator:PrefixedOperator __ expression:SuffixedExpression {
       return {
-        type: OPS_TO_PREFIXED_TYPES[operator],
+        type: operator,
         expression,
         location: location()
       };
     }
   / SuffixedExpression
 
 PrefixedOperator
-  = "$"
-  / "&"
-  / "!"
+  = "$"%"text"
+  / "&"%"simple_and"
+  / "!"%"simple_not"
+
 
 SuffixedExpression
   = expression:PrimaryExpression __ operator:SuffixedOperator {
       return {
-        type: OPS_TO_SUFFIXED_TYPES[operator],
+        type: operator,
         expression,
         location: location()
       };
@@ -201,9 +184,9 @@ SuffixedExpression
   / PrimaryExpression
 
 SuffixedOperator
-  = "?"
-  / "*"
-  / "+"
+  = "?"%"optional"
+  / "*"%"zero_or_more"
+  / "+"%"one_or_more"
 
 RepeatedExpression
   = expression:PrimaryExpression __ "|" __ boundaries:Boundaries __ delimiter:("," __ @Expression __)? "|" {
@@ -268,16 +251,16 @@ RuleReferenceExpression
 SemanticPredicateExpression
   = operator:SemanticPredicateOperator __ code:CodeBlock {
       return {
-        type: OPS_TO_SEMANTIC_PREDICATE_TYPES[operator],
+        type: operator,
         code: code[0],
         codeLocation: code[1],
         location: location()
       };
     }
 
 SemanticPredicateOperator
-  = "&"
-  / "!"
+  = "&"%"semantic_and"
+  / "!"%"semantic_not"
 
 // ---- Lexical Grammar -----
 
@@ -354,15 +337,21 @@ UnicodeConnectorPunctuation
   = Pc
 
 LiteralMatcher "literal"
-  = value:StringLiteral ignoreCase:"i"? {
+  = value:StringLiteral ignoreCase:"i"? mappedValue:("%" @MapValue)? {
       return {
         type: "literal",
         value,
         ignoreCase: ignoreCase !== null,
+        mappedValue,
         location: location()
       };
+
     }
 
+MapValue
+  = StringLiteral
+  / "[" __ @(@StringLiteral __)* "]"
+
 StringLiteral "string"
   = '"' chars:DoubleStringCharacter* '"' { return chars.join(""); }
   / "'" chars:SingleStringCharacter* "'" { return chars.join(""); }
@@ -426,12 +415,12 @@ SingleEscapeCharacter
   = "'"
   / '"'
   / "\\"
-  / "b"  { return "\b"; }
-  / "f"  { return "\f"; }
-  / "n"  { return "\n"; }
-  / "r"  { return "\r"; }
-  / "t"  { return "\t"; }
-  / "v"  { return "\v"; }
+  / "b"%"\b"
+  / "f"%"\f"
+  / "n"%"\n"
+  / "r"%"\r"
+  / "t"%"\t"
+  / "v"%"\v"
 
 NonEscapeCharacter
   = $(!(EscapeCharacter / LineTerminator) SourceCharacter)

diff --git a/test/api/plugin-api.spec.js b/test/api/plugin-api.spec.js
@@ -85,7 +85,7 @@ describe("plugin API", () => {
             "      {",
             "        type: 'rule',",
             "        name: 'start',",
-            "        expression: { type: 'literal',  value: text(), ignoreCase: false }",
+            "        expression: { type: 'literal',  value: text(), ignoreCase: false, mappedValue: null }",
             "      }",
             "    ]",
             "  };",

diff --git a/test/unit/compiler/passes/generate-bytecode.spec.js b/test/unit/compiler/passes/generate-bytecode.spec.js
@@ -30,29 +30,38 @@ describe("compiler pass |generateBytecode|", () => {
     it("generates correct bytecode", () => {
       expect(pass).to.changeAST([
         "a = 'a'",
-        "b = 'b'",
-        "c = 'c'",
+        "b = 'b'%'BB'",
+        "c = 'c'i%'CC'",
+        "d = 'd'%[]",
+        "e = 'e'%['EE' 'FF']",
       ].join("\n"), {
+        literals: ["a", "b", "BB", "c", "CC", "d", [], "e", ["EE", "FF"]],
         rules: [
           { bytecode: [18, 0, 2, 2, 22, 0, 23, 0] },
-          { bytecode: [18, 1, 2, 2, 22, 1, 23, 1] },
-          { bytecode: [18, 2, 2, 2, 22, 2, 23, 2] },
+          { bytecode: [18, 1, 3, 2, 41, 2, 1, 23, 1] },
+          { bytecode: [19, 3, 3, 2, 41, 4, 1, 23, 2] },
+          { bytecode: [18, 5, 3, 2, 41, 6, 1, 23, 3] },
+          { bytecode: [18, 7, 3, 2, 41, 8, 1, 23, 4] },
         ],
       });
     });
 
     it("defines correct constants", () => {
       expect(pass).to.changeAST([
         "a = 'a'",
-        "b = 'b'",
-        "c = 'c'",
+        "b = 'b'%'BB'",
+        "c = 'c'i%'CC'",
+        "d = 'd'%[]",
+        "e = 'e'%['EE' 'FF']",
       ].join("\n"), constsDetails(
-        ["a", "b", "c"],
+        ["a", "b", "BB", "c", "CC", "d", [], "e", ["EE", "FF"]],
         [],
         [
           { type: "literal", value: "a", ignoreCase: false },
           { type: "literal", value: "b", ignoreCase: false },
-          { type: "literal", value: "c", ignoreCase: false },
+          { type: "literal", value: "c", ignoreCase: true },
+          { type: "literal", value: "d", ignoreCase: false },
+          { type: "literal", value: "e", ignoreCase: false },
         ],
         []
       ));

diff --git a/test/unit/parser.spec.js b/test/unit/parser.spec.js
@@ -6,10 +6,11 @@ const parser = require("../../lib/parser");
 const expect = chai.expect;
 
 describe("Peggy grammar parser", () => {
-  const literalAbcd       = { type: "literal",      value: "abcd", ignoreCase: false };
-  const literalEfgh       = { type: "literal",      value: "efgh", ignoreCase: false };
-  const literalIjkl       = { type: "literal",      value: "ijkl", ignoreCase: false };
-  const literalMnop       = { type: "literal",      value: "mnop", ignoreCase: false };
+  const literalAbcd       = { type: "literal",      value: "abcd", ignoreCase: false, mappedValue: null };
+  const literalEfgh       = { type: "literal",      value: "efgh", ignoreCase: false, mappedValue: null };
+  const literalIjkl       = { type: "literal",      value: "ijkl", ignoreCase: false, mappedValue: null };
+  const literalMnop       = { type: "literal",      value: "mnop", ignoreCase: false, mappedValue: null };
+  const mappedAbcd        = { type: "literal",      value: "ABCD", ignoreCase: true, mappedValue: "dcba" };
   const semanticAnd       = { type: "semantic_and", code: " code " };
   const semanticNot       = { type: "semantic_not", code: " code " };
   const optional          = { type: "optional",     expression: literalAbcd };
@@ -78,9 +79,9 @@ describe("Peggy grammar parser", () => {
     );
   }
 
-  function literalGrammar(value, ignoreCase) {
+  function literalGrammar(value, ignoreCase, mappedValue = null) {
     return oneRuleGrammar(
-      { type: "literal", value, ignoreCase }
+      { type: "literal", value, ignoreCase, mappedValue }
     );
   }
 
@@ -330,6 +331,13 @@ describe("Peggy grammar parser", () => {
     );
   });
 
+  // Canonical Expression is "'abcd'", mapped to "'dcba"
+  it("parses Mapped Literal", () => {
+    expect("start = 'ABCD'i%'dcba'").to.parseAs(
+      oneRuleGrammar(mappedAbcd)
+    );
+  });
+
   // Canonical ChoiceExpression is "'abcd' / 'efgh' / 'ijkl'".
   it("parses ChoiceExpression", () => {
     expect("start = 'abcd' { code }").to.parseAs(
@@ -1187,6 +1195,7 @@ c = @'ijkl'
                     type: "literal",
                     value: "abcd",
                     ignoreCase: false,
+                    mappedValue: null,
                     location: {
                       source: undefined,
                       start: { offset: 57, line: 8, column: 11 },
@@ -1275,6 +1284,7 @@ c = @'ijkl'
                 type: "literal",
                 value: "efgh",
                 ignoreCase: false,
+                mappedValue: null,
                 location: {
                   source: undefined,
                   start: { offset: 134, line: 9, column: 12 },
@@ -1322,6 +1332,7 @@ c = @'ijkl'
                 type: "literal",
                 value: "ijkl",
                 ignoreCase: false,
+                mappedValue: null,
                 location: {
                   source: undefined,
                   start: { offset: 146, line: 10, column: 6 },