Skip to content

Commit

Permalink
Levenshtein-CFL intersection using Bar-Hillel construction
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Oct 11, 2023
1 parent c20e49e commit 7df6f82
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package ai.hypergraph.kaliningraph.parsing
import ai.hypergraph.kaliningraph.types.*

infix fun FSA.intersect(cfg: CFG) = cfg.intersect(this)
// http://www.cs.umd.edu/~gasarch/BLOGPAPERS/cfg.pdf
// http://www.cs.umd.edu/~gasarch/BLOGPAPERS/cfg.pdf#page=2
// https://browse.arxiv.org/pdf/2209.06809.pdf#page=5

infix fun CFG.intersect(fsa: FSA): CFG {
Expand All @@ -19,7 +19,7 @@ infix fun CFG.intersect(fsa: FSA): CFG {
nonterminalProductions.map {
val triples = fsa.states * fsa.states * fsa.states
val (A, B, C) = it.π1 to it.π2[0] to it.π2[1]
triples.map { (p, q, r) -> "[$p,$A,$r] -> [$q,$B,$q] [$q,$C,$r]" }
triples.map { (p, q, r) -> "[$p,$A,$r] -> [$p,$B,$q] [$q,$C,$r]" }
}.flatten()

// For every production A → σ in P, for every (p, σ, q) ∈ Q × Σ × Q
Expand All @@ -32,7 +32,8 @@ infix fun CFG.intersect(fsa: FSA): CFG {

return (initFinal + transits + binaryProds + unitProds).joinToString("\n")
.parseCFG(normalize = false)
// .also { println(it.pretty) }
// .removeVestigalProductions()
.removeVestigalProductions()
.normalForm.noNonterminalStubs
.also { println(it.pretty) }
// .also { println(it.size) }
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ fun CFG.isValid(str: List<Σᐩ>): 𝔹 =
//.also { println("Last: ${it.joinToString(",") {if (it) "1" else "0"}}") }
.let { corner -> corner[bindex[START_SYMBOL]] }

fun CFG.corner(str: Σᐩ) =
solveFixedpoint(str.tokenizeByWhitespace())[0].last().map { it.root }.toSet()
// START_SYMBOL in solveFixedpoint(str.tokenizeByWhitespace())[0].last().map { it.root }.toSet()
fun CFG.parseForest(str: Σᐩ): Forest = solveFixedpoint(str.tokenizeByWhitespace())[0].last()
fun CFG.parseTable(str: Σᐩ): TreeMatrix = solveFixedpoint(str.tokenizeByWhitespace())

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package ai.hypergraph.kaliningraph.parsing

import ai.hypergraph.kaliningraph.levenshtein
import kotlin.test.*
import kotlin.time.*

Expand Down Expand Up @@ -223,21 +224,33 @@ class BarHillelTest {
val simpleCFG = """
START -> E
O -> + | *
E -> N O N
E -> N O N | E O N
N -> 1 | 2
""".parseCFG().noEpsilonOrNonterminalStubs

val levFSA = makeLevFSA("1 + 1", 1, simpleCFG.terminals)
// println(levFSA.graph.toDot())
val origStr = "1 + 1"
val levFSA = makeLevFSA(origStr, 2, simpleCFG.terminals)

// println(levFSA.Q.size)
val levCFG = levFSA.intersect(simpleCFG)

val levCFG = (levFSA.intersect(simpleCFG))//.also { print(it.pretty) }
// println(levCFG.graph.toDot())
fun testLevenshteinAcceptance(s: Σᐩ) {
assertTrue(levFSA.recognizes(s))
assertTrue(s in simpleCFG.language)
assertTrue(s in levCFG.language)
}

val testStr = "1 * 1"
assertTrue(levFSA.recognizes(testStr))
assertTrue(testStr in simpleCFG.language)
// println(levCFG.corner(testStr))
val neighbor = "1 * 2"
assertEquals(2, levenshtein(origStr, neighbor))
testLevenshteinAcceptance(neighbor)

val foreign = "1 + 1 + 1"
testLevenshteinAcceptance(foreign)

val testFail = "2 * 2"
assertFalse(testFail in levCFG.language)

val template = List(5) { "_" }.joinToString(" ")
val solutions = levCFG.solveSeq(template).toList().onEach { println(it) }
println("Found ${solutions.size} solutions within Levenshtein distance 2 of \"$origStr\"")
}
}

0 comments on commit 7df6f82

Please sign in to comment.