Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented mapping syntax in peg grammars #433

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/js/benchmark-bundle.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/js/test-bundle.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/vendor/peggy/peggy.min.js

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions examples/javascript.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -303,12 +303,12 @@ SingleEscapeCharacter
= "'"
/ '"'
/ "\\"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "v" { return "\v"; }
/ "b"%"\b"
/ "f"%"\f"
/ "n"%"\n"
/ "r"%"\r"
/ "t"%"\t"
/ "v"%"\v"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't find this easier to read or maintain. Are there other examples you think might benefit from this approach? Maybe add a new example that shows the full power?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree. In any case it is better to use -> or =>

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, if it was => I might be able to get excited about this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Particularly if we also added &=> and !=> at the same time...


NonEscapeCharacter
= !(EscapeCharacter / LineTerminator) SourceCharacter { return text(); }
Expand Down
10 changes: 5 additions & 5 deletions examples/json.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,11 @@ char
'"'
/ "\\"
/ "/"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "b"%"\b"
/ "f"%"\f"
/ "n"%"\n"
/ "r"%"\r"
/ "t"%"\t"
/ "u" digits:$(HEXDIG HEXDIG HEXDIG HEXDIG) {
return String.fromCharCode(parseInt(digits, 16));
}
Expand Down
3 changes: 3 additions & 0 deletions lib/compiler/opcodes.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const opcodes = {
MATCH_REGEXP: 20, // MATCH_REGEXP r, a, f, ...
ACCEPT_N: 21, // ACCEPT_N n
ACCEPT_STRING: 22, // ACCEPT_STRING s
MAP_STRING_N: 41, // MAP_STRING_N s, n
FAIL: 23, // FAIL e

// Calls
Expand Down Expand Up @@ -76,6 +77,8 @@ const opcodes = {
SOURCE_MAP_POP: 38, // SOURCE_MAP_POP
SOURCE_MAP_LABEL_PUSH: 39, // SOURCE_MAP_LABEL_PUSH sp, literal-index, loc-index
SOURCE_MAP_LABEL_POP: 40, // SOURCE_MAP_LABEL_POP sp

// MAP_STRING_N: 41, // MAP_STRING_N s, n
};

module.exports = opcodes;
22 changes: 17 additions & 5 deletions lib/compiler/passes/generate-bytecode.js
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,11 @@ const { ALWAYS_MATCH, SOMETIMES_MATCH, NEVER_MATCH } = require("./inference-matc
// stack.push(literals[s]);
// currPos += literals[s].length;
//
// [41] MAP_STRING_N s, n
//
// stack.push(literals[s]);
// currPos += n;
//
// [23] FAIL e
//
// stack.push(FAILED);
Expand Down Expand Up @@ -969,17 +974,22 @@ function generateBytecode(ast, options) {
},

literal(node) {
if (node.value.length > 0) {
const isMapped = node.mappedValue !== null;
if (isMapped || node.value.length > 0) {
const match = node.match | 0;
// String only required if condition is generated or string is
// case-sensitive and node always match
const needConst = match === SOMETIMES_MATCH
const needConst = isMapped
|| match === SOMETIMES_MATCH
|| (match === ALWAYS_MATCH && !node.ignoreCase);
const stringIndex = needConst
? addLiteralConst(
node.ignoreCase ? node.value.toLowerCase() : node.value
)
: null;
const outIndex = isMapped
? addLiteralConst(node.mappedValue)
: null;
// Expectation not required if node always match
const expectedIndex = (match !== ALWAYS_MATCH)
? addExpectedConst({
Expand All @@ -997,9 +1007,11 @@ function generateBytecode(ast, options) {
node.ignoreCase
? [op.MATCH_STRING_IC, stringIndex]
: [op.MATCH_STRING, stringIndex],
node.ignoreCase
? [op.ACCEPT_N, node.value.length]
: [op.ACCEPT_STRING, stringIndex],
(isMapped
&& [op.MAP_STRING_N, outIndex, node.value.length])
|| (node.ignoreCase
? [op.ACCEPT_N, node.value.length]
: [op.ACCEPT_STRING, stringIndex]),
[op.FAIL, expectedIndex]
);
}
Expand Down
13 changes: 13 additions & 0 deletions lib/compiler/passes/generate-js.js
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ function generateJS(ast, options) {

function generateTables() {
function buildLiteral(literal) {
if (Array.isArray(literal)) {
return "[" + literal.map(buildLiteral).join(",") + "]";
}
return "\"" + stringEscape(literal) + "\"";
}

Expand Down Expand Up @@ -558,6 +561,16 @@ function generateJS(ast, options) {
ip += 2;
break;

case op.MAP_STRING_N: // MAP_STRING_N s, n
parts.push(stack.push(l(bc[ip + 1])));
parts.push(
bc[ip + 2] > 1
? "peg$currPos += " + bc[ip + 2] + ";"
: "peg$currPos++;"
);
ip += 3;
break;

case op.FAIL: // FAIL e
parts.push(stack.push("peg$FAILED"));
parts.push("if (peg$silentFails === 0) { peg$fail(" + e(bc[ip + 1]) + "); }");
Expand Down
627 changes: 331 additions & 296 deletions lib/parser.js

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions lib/peg.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ declare namespace ast {
interface Literal extends Expr<"literal"> {
/** Sequence of symbols to match. */
value: string;
/** Value to output instead, if present */
mappedValue?: string;
/** If `true`, symbols matches even if they case do not match case in the `value`. */
ignoreCase: boolean;
}
Expand Down
61 changes: 25 additions & 36 deletions src/parser.pegjs
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,6 @@
//
// [1] http://www.ecma-international.org/publications/standards/Ecma-262.htm

{{
const OPS_TO_PREFIXED_TYPES = {
"$": "text",
"&": "simple_and",
"!": "simple_not"
};

const OPS_TO_SUFFIXED_TYPES = {
"?": "optional",
"*": "zero_or_more",
"+": "one_or_more"
};

const OPS_TO_SEMANTIC_PREDICATE_TYPES = {
"&": "semantic_and",
"!": "semantic_not"
};
}}
{
// Cannot use Set here because of native IE support.
const reservedWords = options.reservedWords || [];
Expand Down Expand Up @@ -177,22 +159,23 @@ LabelColon
PrefixedExpression
= operator:PrefixedOperator __ expression:SuffixedExpression {
return {
type: OPS_TO_PREFIXED_TYPES[operator],
type: operator,
expression,
location: location()
};
}
/ SuffixedExpression

PrefixedOperator
= "$"
/ "&"
/ "!"
= "$"%"text"
/ "&"%"simple_and"
/ "!"%"simple_not"


SuffixedExpression
= expression:PrimaryExpression __ operator:SuffixedOperator {
return {
type: OPS_TO_SUFFIXED_TYPES[operator],
type: operator,
expression,
location: location()
};
Expand All @@ -201,9 +184,9 @@ SuffixedExpression
/ PrimaryExpression

SuffixedOperator
= "?"
/ "*"
/ "+"
= "?"%"optional"
/ "*"%"zero_or_more"
/ "+"%"one_or_more"

RepeatedExpression
= expression:PrimaryExpression __ "|" __ boundaries:Boundaries __ delimiter:("," __ @Expression __)? "|" {
Expand Down Expand Up @@ -268,16 +251,16 @@ RuleReferenceExpression
SemanticPredicateExpression
= operator:SemanticPredicateOperator __ code:CodeBlock {
return {
type: OPS_TO_SEMANTIC_PREDICATE_TYPES[operator],
type: operator,
code: code[0],
codeLocation: code[1],
location: location()
};
}

SemanticPredicateOperator
= "&"
/ "!"
= "&"%"semantic_and"
/ "!"%"semantic_not"

// ---- Lexical Grammar -----

Expand Down Expand Up @@ -354,15 +337,21 @@ UnicodeConnectorPunctuation
= Pc

LiteralMatcher "literal"
= value:StringLiteral ignoreCase:"i"? {
= value:StringLiteral ignoreCase:"i"? mappedValue:("%" @MapValue)? {
return {
type: "literal",
value,
ignoreCase: ignoreCase !== null,
mappedValue,
location: location()
};

}

MapValue
= StringLiteral
/ "[" __ @(@StringLiteral __)* "]"

StringLiteral "string"
= '"' chars:DoubleStringCharacter* '"' { return chars.join(""); }
/ "'" chars:SingleStringCharacter* "'" { return chars.join(""); }
Expand Down Expand Up @@ -426,12 +415,12 @@ SingleEscapeCharacter
= "'"
/ '"'
/ "\\"
/ "b" { return "\b"; }
/ "f" { return "\f"; }
/ "n" { return "\n"; }
/ "r" { return "\r"; }
/ "t" { return "\t"; }
/ "v" { return "\v"; }
/ "b"%"\b"
/ "f"%"\f"
/ "n"%"\n"
/ "r"%"\r"
/ "t"%"\t"
/ "v"%"\v"

NonEscapeCharacter
= $(!(EscapeCharacter / LineTerminator) SourceCharacter)
Expand Down
2 changes: 1 addition & 1 deletion test/api/plugin-api.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ describe("plugin API", () => {
" {",
" type: 'rule',",
" name: 'start',",
" expression: { type: 'literal', value: text(), ignoreCase: false }",
" expression: { type: 'literal', value: text(), ignoreCase: false, mappedValue: null }",
" }",
" ]",
" };",
Expand Down
25 changes: 17 additions & 8 deletions test/unit/compiler/passes/generate-bytecode.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,38 @@ describe("compiler pass |generateBytecode|", () => {
it("generates correct bytecode", () => {
expect(pass).to.changeAST([
"a = 'a'",
"b = 'b'",
"c = 'c'",
"b = 'b'%'BB'",
"c = 'c'i%'CC'",
"d = 'd'%[]",
"e = 'e'%['EE' 'FF']",
].join("\n"), {
literals: ["a", "b", "BB", "c", "CC", "d", [], "e", ["EE", "FF"]],
rules: [
{ bytecode: [18, 0, 2, 2, 22, 0, 23, 0] },
{ bytecode: [18, 1, 2, 2, 22, 1, 23, 1] },
{ bytecode: [18, 2, 2, 2, 22, 2, 23, 2] },
{ bytecode: [18, 1, 3, 2, 41, 2, 1, 23, 1] },
{ bytecode: [19, 3, 3, 2, 41, 4, 1, 23, 2] },
{ bytecode: [18, 5, 3, 2, 41, 6, 1, 23, 3] },
{ bytecode: [18, 7, 3, 2, 41, 8, 1, 23, 4] },
],
});
});

it("defines correct constants", () => {
expect(pass).to.changeAST([
"a = 'a'",
"b = 'b'",
"c = 'c'",
"b = 'b'%'BB'",
"c = 'c'i%'CC'",
"d = 'd'%[]",
"e = 'e'%['EE' 'FF']",
].join("\n"), constsDetails(
["a", "b", "c"],
["a", "b", "BB", "c", "CC", "d", [], "e", ["EE", "FF"]],
[],
[
{ type: "literal", value: "a", ignoreCase: false },
{ type: "literal", value: "b", ignoreCase: false },
{ type: "literal", value: "c", ignoreCase: false },
{ type: "literal", value: "c", ignoreCase: true },
{ type: "literal", value: "d", ignoreCase: false },
{ type: "literal", value: "e", ignoreCase: false },
],
[]
));
Expand Down
23 changes: 17 additions & 6 deletions test/unit/parser.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ const parser = require("../../lib/parser");
const expect = chai.expect;

describe("Peggy grammar parser", () => {
const literalAbcd = { type: "literal", value: "abcd", ignoreCase: false };
const literalEfgh = { type: "literal", value: "efgh", ignoreCase: false };
const literalIjkl = { type: "literal", value: "ijkl", ignoreCase: false };
const literalMnop = { type: "literal", value: "mnop", ignoreCase: false };
const literalAbcd = { type: "literal", value: "abcd", ignoreCase: false, mappedValue: null };
const literalEfgh = { type: "literal", value: "efgh", ignoreCase: false, mappedValue: null };
const literalIjkl = { type: "literal", value: "ijkl", ignoreCase: false, mappedValue: null };
const literalMnop = { type: "literal", value: "mnop", ignoreCase: false, mappedValue: null };
const mappedAbcd = { type: "literal", value: "ABCD", ignoreCase: true, mappedValue: "dcba" };
const semanticAnd = { type: "semantic_and", code: " code " };
const semanticNot = { type: "semantic_not", code: " code " };
const optional = { type: "optional", expression: literalAbcd };
Expand Down Expand Up @@ -78,9 +79,9 @@ describe("Peggy grammar parser", () => {
);
}

function literalGrammar(value, ignoreCase) {
function literalGrammar(value, ignoreCase, mappedValue = null) {
return oneRuleGrammar(
{ type: "literal", value, ignoreCase }
{ type: "literal", value, ignoreCase, mappedValue }
);
}

Expand Down Expand Up @@ -330,6 +331,13 @@ describe("Peggy grammar parser", () => {
);
});

// Canonical Expression is "'abcd'", mapped to "'dcba"
it("parses Mapped Literal", () => {
expect("start = 'ABCD'i%'dcba'").to.parseAs(
oneRuleGrammar(mappedAbcd)
);
});

// Canonical ChoiceExpression is "'abcd' / 'efgh' / 'ijkl'".
it("parses ChoiceExpression", () => {
expect("start = 'abcd' { code }").to.parseAs(
Expand Down Expand Up @@ -1187,6 +1195,7 @@ c = @'ijkl'
type: "literal",
value: "abcd",
ignoreCase: false,
mappedValue: null,
location: {
source: undefined,
start: { offset: 57, line: 8, column: 11 },
Expand Down Expand Up @@ -1275,6 +1284,7 @@ c = @'ijkl'
type: "literal",
value: "efgh",
ignoreCase: false,
mappedValue: null,
location: {
source: undefined,
start: { offset: 134, line: 9, column: 12 },
Expand Down Expand Up @@ -1322,6 +1332,7 @@ c = @'ijkl'
type: "literal",
value: "ijkl",
ignoreCase: false,
mappedValue: null,
location: {
source: undefined,
start: { offset: 146, line: 10, column: 6 },
Expand Down