Skip to content

Commit

Permalink
spoke too soon, fix a minor bug
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed Sep 25, 2023
1 parent 14dc51d commit 68ffcfb
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,57 +8,46 @@ operator fun PForest.contains(v: Σᐩ) = PTree(v) in this
fun PSingleton(v: Σᐩ): List<Π2A<PTree>> = listOf(PTree(v) to PTree())

// Algebraic data type / polynomial functor for parse forests
data class PTree(val root: Σᐩ = "ε", val children: List<Π2A<PTree>> = listOf()) {
class PTree(val root: Σᐩ = "ε", val branches: List<Π2A<PTree>> = listOf()) {
// Returns the set of all strings derivable from the given PTree
fun choose(): Sequence<Σᐩ> =
if (children.isEmpty()) sequenceOf(if("ε" in root) "" else root)
else children.asSequence().flatMap { (l, r) ->
if (branches.isEmpty()) sequenceOf(if("ε" in root) "" else root)
else branches.asSequence().flatMap { (l, r) ->
// TODO: Use weighted choice mechanism
(l.choose() * r.choose()).map { (a, b) ->
if (a == "") b else if (b == "") a else "$a $b"
}
}

override fun hashCode(): Int = root.hashCode()
override fun equals(other: Any?) = other is PTree && root == other.root
override fun equals(other: Any?) = hashCode() == other.hashCode()
}

// Lazily computes all syntactically strings compatible with the given template
fun CFG.solveSeq(s: Σᐩ): Sequence<Σᐩ> = solveSeq(s.tokenizeByWhitespace())

fun CFG.solveSeq(s: List<Σᐩ>): Sequence<Σᐩ> =
try { solvePTreeFPSeq(s) }
catch (e: Exception) { e.printStackTrace(); null } ?: sequenceOf()

fun CFG.solvePTreeFPSeq(
tokens: List<Σᐩ>,
utMatrix: UTMatrix<PForest> = initPForestMatrix(tokens, pforestAlgebra()),
) =
utMatrix.seekFixpoint().toFullMatrix()[0].last()
fun CFG.solveSeq(s: Σᐩ): Sequence<Σᐩ> =
initPForestMat(s.tokenizeByWhitespace()).seekFixpoint().diagonals.last()[0]
.firstOrNull { it.root == START_SYMBOL }?.choose() ?: emptySequence()

fun CFG.initPForestMatrix(
tokens: List<Σᐩ>,
algebra: Ring<PForest>
): UTMatrix<PForest> =
fun CFG.initPForestMat(tokens: List<Σᐩ>): UTMatrix<PForest> =
UTMatrix(
ts = tokens.map { token ->
(if (token != HOLE_MARKER) bimap[listOf(token)] else unitNonterminals)
.associateWith { nt ->
if (token != HOLE_MARKER) PSingleton(token)
else bimap.UNITS[nt]?.map { PSingleton(it) }?.flatten()?.toSet()?.toList() ?: listOf()
else bimap.UNITS[nt]?.map { PSingleton(it) }?.flatten() ?: listOf()
}.map { (k, v) -> PTree(k, v) }.toSet()
}.toTypedArray(),
algebra = algebra
algebra = Ring.of(
nil = emptySet(),
plus = { x, y -> merge(x, y) },
times = { x, y -> joinSeq(x, y) },
)
)

// Maintains a sorted list of nonterminal roots and their leaves
fun CFG.pforestAlgebra(): Ring<PForest> =
Ring.of(
nil = emptySet(),
plus = { x, y -> x union y },
times = { x, y -> joinSeq(x, y) },
)
fun merge(X: PForest, Z: PForest): PForest =
(X.toList() + Z).groupBy { it.root }.map { (k, v) ->
PTree(k, v.map { it.branches }.flatten())
}.toSet()

// X ⊗ Z := { w | <x, z> ∈ X × Z, (w -> xz) ∈ P }
fun CFG.joinSeq(X: PForest, Z: PForest): PForest =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ private fun List<Σᐩ>.pad3(): List<Σᐩ> =

fun CFG.isValid(str: Σᐩ): 𝔹 = isValid(str.tokenizeByWhitespace())
fun CFG.isValid(str: List<Σᐩ>): 𝔹 =
initialUTBMatrix(str.pad3()).seekFixpoint().diagonals
// .also { it.forEachIndexed { r, d -> d.forEachIndexed { i, it -> println("$r, $i: ${toNTSet(it)}") } } }
.last().first()//.also { println("Last: ${it.joinToString(",") {if (it) "1" else "0"}}") }
initialUTBMatrix(str.pad3()).seekFixpoint().diagonals.last()[0]
//.also { it.forEachIndexed { r, d -> d.forEachIndexed { i, it -> println("$r, $i: ${toNTSet(it)}") } } }
//.also { println("Last: ${it.joinToString(",") {if (it) "1" else "0"}}") }
.let { corner -> corner[bindex[START_SYMBOL]] }

fun CFG.parseForest(str: Σᐩ): Forest = solveFixedpoint(str.tokenizeByWhitespace())[0].last()
Expand All @@ -59,7 +59,7 @@ fun CFG.solveFixedpoint(
// a sequence of partial trees ordered by the length of the substring that can be parsed.
fun CFG.parseWithStubs(s: Σᐩ): Pair<Forest, List<Tree>> =
solveFixedpoint(s.tokenizeByWhitespace()).toUTMatrix().diagonals.asReversed().let {
it.first()[0].filter { it.root == START_SYMBOL }.map { it.denormalize() }.toSet() to
it[0][0].filter { it.root == START_SYMBOL }.map { it.denormalize() }.toSet() to
it.flatten().flatten().map { it.denormalize() }
}

Expand Down Expand Up @@ -295,7 +295,7 @@ fun List<Σᐩ>.solve(
fun List<Σᐩ>.genCandidates(CFG: CFG, fillers: Set<Σᐩ> = CFG.terminals): Sequence<Σᐩ> =
MDSamplerWithoutReplacement(fillers, count { it == HOLE_MARKER }).map {
fold("" to it) { (a, b), c ->
if (c == HOLE_MARKER) (a + " " + b.first()) to b.drop(1) else ("$a $c") to b
if (c == HOLE_MARKER) (a + " " + b[0]) to b.drop(1) else ("$a $c") to b
}.first.replace("ε ", "").trim()
}

Expand Down

0 comments on commit 68ffcfb

Please sign in to comment.