Skip to content

Commit

Permalink
Add CFG syntax parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
q-uint committed Feb 12, 2024
1 parent 540f1a2 commit 2e44cbd
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 13 deletions.
18 changes: 5 additions & 13 deletions g_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,11 @@ var (
)

func ExampleCFG() {
S := cfg.Variable("S")
a := cfg.Terminal("a")
b := cfg.Terminal("b")
g, _ := cfg.New(
[]cfg.Variable{S},
[]cfg.Terminal{a, b},
[]cfg.Production{
cfg.NewProduction(S, []cfg.Beta{a, S, a}), // aSa
cfg.NewProduction(S, []cfg.Beta{b, S, b}), // bSb
cfg.NewProduction(S, []cfg.Beta{cfg.Epsilon}), // ε
},
S,
)
g, _ := cfg.Parse(`
S → aSa
S → bSb
S → ε
`)

in := "aabbaa"
fmt.Println(g)
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
module github.com/0x51-dev/cfg

go 1.20

require github.com/0x51-dev/upeg v0.1.1 // indirect
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
github.com/0x51-dev/upeg v0.1.0 h1:ze3Uu4H9q0Fq8FuJmV1RsrTl85LholOZExCBwpujiPc=
github.com/0x51-dev/upeg v0.1.0/go.mod h1:ts9/Zafxb9W9drZFTmQNMR7kLOyHyBw37NuowyiDork=
github.com/0x51-dev/upeg v0.1.1 h1:K3zXuTQHSCyh9rAZURt1G8vMw0GSJWy75dSVWpYLNqM=
github.com/0x51-dev/upeg v0.1.1/go.mod h1:ts9/Zafxb9W9drZFTmQNMR7kLOyHyBw37NuowyiDork=
122 changes: 122 additions & 0 deletions parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package cfg

import (
"fmt"
"github.com/0x51-dev/upeg/parser"
"github.com/0x51-dev/upeg/parser/op"
)

var (
grammar = op.Capture{
Name: "CFG",
Value: op.And{
op.ZeroOrMore{Value: op.EndOfLine{}},
op.OneOrMore{
Value: productionRule,
},
},
}
nonTerminal = op.Capture{
Name: "NonTerminal",
Value: op.RuneRange{Min: 'A', Max: 'Z'},
}
terminal = op.Capture{
Name: "Terminal",
Value: op.Or{
op.RuneRange{Min: 'a', Max: 'z'},
'(', ')', '[', ']',
},
}
epsilon = op.Capture{
Name: "Epsilon",
Value: 'ε',
}
expression = op.Capture{
Name: "Expression",
Value: op.Or{op.OneOrMore{Value: op.Or{terminal, nonTerminal}}, epsilon},
}
productionRule = op.Capture{
Name: "ProductionRule",
Value: op.And{
nonTerminal,
op.Or{'→', "->"},
expression,
op.ZeroOrMore{Value: op.And{'|', expression}},
op.EndOfLine{},
},
}
)

func parseGrammar(n *parser.Node) (*CFG, error) {
if n.Name != "CFG" {
return nil, fmt.Errorf("expected CFG, got %s", n.Name)
}

var start Variable
vm := make(map[Variable]struct{})
tm := make(map[Terminal]struct{})
var productions []Production
for _, n := range n.Children() {
if n.Name != "ProductionRule" {
return nil, fmt.Errorf("expected ProductionRule, got %s", n.Name)
}
if len(n.Children()) < 2 {
return nil, fmt.Errorf("expected at least 2 children, got %d", len(n.Children()))
}

v := Variable(n.Children()[0].Value())
if _, ok := vm[v]; !ok {
if start == "" {
// First non-terminal is the start symbol.
start = v
}
vm[v] = struct{}{}
}

for _, n := range n.Children()[1:] {
if n.Name != "Expression" {
return nil, fmt.Errorf("expected Expression, got %s", n.Name)
}
var ts []Beta
for _, n := range n.Children() {
switch n.Name {
case "Terminal":
t := Terminal(n.Value())
ts = append(ts, t)
if _, ok := tm[t]; !ok {
tm[t] = struct{}{}
}
case "NonTerminal":
ts = append(ts, Variable(n.Value()))
case "Epsilon":
ts = append(ts, Epsilon)
default:
return nil, fmt.Errorf("expected Terminal, NonTerminal, or Epsilon, got %s", n.Name)
}
}
productions = append(productions, Production{A: v, B: ts})
}
}
var variables []Variable
for v := range vm {
variables = append(variables, v)
}
var terminals []Terminal
for t := range tm {
terminals = append(terminals, t)
}
return New(variables, terminals, productions, start)
}

func Parse(input string) (*CFG, error) {
p, err := parser.New([]rune(input))
if err != nil {
return nil, err
}
p.SetIgnoreList([]any{' ', '\t'})
n, err := p.Parse(op.And{grammar, op.EOF{}})
if err != nil {
return nil, err
}
return parseGrammar(n)
}
19 changes: 19 additions & 0 deletions parser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package cfg

import (
"testing"
)

func TestParse(t *testing.T) {
for _, rawGrammar := range []string{
"A -> a\n",
"A -> aA\nA -> ε\n",
"\nS → aSa\nS → bSb\nS → ε\n",
"S → SS\nS → ()\nS → (S)\nS → []\nS → [S]\n",
"S → T | U\nT → VaT | VaV | TaV\nU → VbU | VbV | UbV\nV → aVbV | bVaV | ε\n",
} {
if _, err := Parse(rawGrammar); err != nil {
t.Error(err)
}
}
}

0 comments on commit 2e44cbd

Please sign in to comment.