Skip to content

Commit

Permalink
debug bijection
Browse files Browse the repository at this point in the history
  • Loading branch information
breandan committed May 22, 2024
1 parent 8faf185 commit de5b097
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 17 deletions.
2 changes: 1 addition & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import kotlin.time.DurationUnit.MILLISECONDS
plugins {
signing
`maven-publish`
kotlin("multiplatform") version "2.0.0-RC3"
kotlin("multiplatform") version "2.0.0"
// kotlin("jupyter.api") version "0.11.0-225"
id("com.github.ben-manes.versions") version "0.51.0"
id("io.github.gradle-nexus.publish-plugin") version "2.0.0-rc-2"
Expand Down
2 changes: 1 addition & 1 deletion gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip
distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-rc-1-bin.zip
networkTimeout=10000
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
Binary file modified latex/splash2024/splash.pdf
Binary file not shown.
6 changes: 3 additions & 3 deletions latex/splash2024/splash.tex
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@
\node (start) [startstop, draw=none];
\node (pro1) [process, below of=start, yshift=-0.3cm] {$G_\cap \leftarrow G\cap\Delta(\err\sigma, d)$};
\node [above=0.07cm of pro1] {(\S~\ref{sec:lev_bh})};
\node (pcfg) [io2, left of=pro1, xshift=-3cm] {[PHO]CFG};
\node (pcfg) [io2, left of=pro1, xshift=-3cm] {[P]CFG};
\node [above=0.07cm of pcfg, xshift=1cm] {(\S~\ref{sec:prelim})};
\node (lnfa) [io, right of=pro1, xshift=3cm] {L-NFA};
\node [above=0.07cm of lnfa, xshift=1cm] {(\S~\ref{sec:lev_nfa})};
Expand Down Expand Up @@ -806,7 +806,7 @@
L(p) = 1 + p L(p) \phantom{addspace} P(a) = \Sigma + V + V L\big(V^2P(a)^2\big)
\end{equation}

Depicted in Fig.~\ref{fig:ptree} is a partial $\mathbb{T}_2$, where red nodes are \texttt{root}s and blue nodes are \texttt{children}. The shape of type $\mathbb{T}_2$ is congruent with an acyclic CFG in Chomsky Normal Form, i.e., $\mathbb{T}_2\cong\mathcal{G}'$, so assuming the CFG recognizes a finite language, as the case for $G_\cap'$, we can translate it directly. If the language is infinite, we slice the CFL, $\mathcal{L}(G)\cap \Sigma^n$, and compute the fixpoint for each slice.
Depicted in Fig.~\ref{fig:ptree} is a partial $\mathbb{T}_2$, where red nodes are \texttt{root}s and blue nodes are \texttt{children}. The shape of type $\mathbb{T}_2$ is congruent with an acyclic CFG in Chomsky Normal Form, i.e., $\mathbb{T}_2\cong\mathcal{G}'$, so assuming the CFG recognizes a finite language, as is the case for $G_\cap'$, we can translate it directly. If the language is infinite, we slice the CFL, $\mathcal{L}(G)\cap \Sigma^n$, and compute the fixpoint for each slice.

Given a porous string $\sigma: \underline\Sigma^n$ representing the slice, we can construct $\mathbb{T}_2$ from the bottom-up, and read off structures from the top-down. We construct the first upper diagonal $\hat\sigma_r = \Lambda(\sigma_r)$ as follows:

Expand Down Expand Up @@ -1080,7 +1080,7 @@
\edge {x} {k,l}
\end{tikzpicture}}\end{minipage}\begin{minipage}{5cm}\begin{align*} P(X\rightarrow KL \mid W\rightarrow XZ) &= P\big((\cdot x \cdot) \rightarrow (\cdot k \cdot)(\cdot l \cdot) \mid (\cdot w \cdot) \rightarrow (\cdot x \cdot)(\cdot z \cdot)\big) \\ &= P(K=k, L=l \mid W, X, Z)\\ &= \frac{C(K=k, L=l \mid W, X, Z)}{\sum_{k', l'}C(K=k', L=l' \mid W, X, Z)} \end{align*}\end{minipage}\end{table}

Since the parent can be a left- or right-sibling, we also consider a symmetric case:
Since the parent can be a left- or right-sibling, we should also consider the symmetric case:

\begin{table}[h!]
\begin{minipage}{3cm}\resizebox{.6\textwidth}{!}{\begin{tikzpicture}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ class PTree(val root: String = ".ε", val branches: List<Π2A<PTree>> = listOf()
.reduce { acc, it -> acc + it }
}

val ranges: List<Pair<BigInteger, BigInteger>> by lazy {
if (branches.isEmpty()) listOf(BigInteger.ZERO to BigInteger.ONE)
else branches.map { (l, r) -> l.totalTrees + r.totalTrees }
.fold(listOf(BigInteger.ZERO)) { acc, it -> acc + (acc.last() + it) }
.windowed(2) { (a, b) -> a to b - 1 }
}

// e.g., if we want to prioritize shorter strings we can sort by total epsilons
val numEpsilons: BigInteger by lazy {
if (branches.isEmpty()) if (root == "ε") BigInteger.ONE else BigInteger.ZERO
Expand All @@ -65,7 +72,7 @@ class PTree(val root: String = ".ε", val branches: List<Π2A<PTree>> = listOf()
either: (T?, T?) -> T?,
unit: (PTree) -> T?
): T? =
if (branches.isEmpty()) if("ε" in root) null else unit(this)
if (branches.isEmpty()) if ("ε" in root) null else unit(this)
else branches.map { (l, r) ->
both(l.propagator(both, either, unit), r.propagator(both, either, unit))
}.reduce { acc, t -> either(acc, t) }
Expand All @@ -88,25 +95,51 @@ class PTree(val root: String = ".ε", val branches: List<Π2A<PTree>> = listOf()

fun choose(): Sequence<String> = choice.asSequence()

private fun newDecoder(i: BigInteger): String {
if (branches.isEmpty()) return epsStr
// println("Decoding $i")
// println(ranges.first().first)
// println(ranges.last().first)
val t =
if (i < ranges.first().first) 0
else if (i > ranges.last().first) branches.size - 1
else ranges.indexOfFirst { it.first <= i && i <= it.second }
val (l, r) = branches[t]
val range = l.totalTrees * r.totalTrees
val mod = i mod range
// val q = i - ranges[t].first
// val ratio = (l.totalTrees * 10000) / r.totalTrees
// val iLeft = (mod * ratio) / 10000
// val iRight = i - numLeft
// val (iLeft, iRight) = q.divrem(r.totalTrees)
val iLeft = mod
val iRight = range - mod
// val (iLeft, iRight) = mod.divrem(r.totalTrees)

val left = l.newDecoder(iLeft)
val right = r.newDecoder(iRight)
return if (left.isEmpty()) right else if (right.isEmpty()) left else "$left $right"
}

// Average time: 436.96ms, total time 43696.959ms (testRandomCFG)
private fun decodeString(i: BigInteger): Pair<String, BigInteger> {
if (branches.isEmpty()) return (epsStr) to i
if (branches.isEmpty()) return epsStr to i
val (quotient1, remainder) = i.divrem(branches.size.toBigInteger())
val (lb, rb) = shuffledBranches[remainder.intValue()]
val (l, quotient2) = lb.decodeString(quotient1)
val (r, quotient3) = rb.decodeString(quotient2)
val concat = (if(l.isEmpty()) r else if(r.isEmpty()) l else "$l $r")
val concat = (if (l.isEmpty()) r else if (r.isEmpty()) l else "$l $r")
return concat to quotient3
}

// Average time: 328.99ms, total time 32899.708ms (testRandomCFG)
private fun decodeStringFast(i: Long): Pair<String, Long> {
if (branches.isEmpty()) return (epsStr) to i
if (branches.isEmpty()) return epsStr to i
val (quotient1, remainder) = i / branches.size.toLong() to (i % branches.size.toLong())
val (lb, rb) = shuffledBranches[remainder.toInt()]
val (l, quotient2) = lb.decodeStringFast(quotient1)
val (r, quotient3) = rb.decodeStringFast(quotient2)
val concat = (if(l.isEmpty()) r else if(r.isEmpty()) l else "$l $r")
val concat = (if (l.isEmpty()) r else if (r.isEmpty()) l else "$l $r")
return concat to quotient3
}

Expand All @@ -130,6 +163,7 @@ class PTree(val root: String = ".ε", val branches: List<Π2A<PTree>> = listOf()
sequence {
var i = BigInteger.ZERO
while (i < 9 * totalTrees) yield(decodeString(i++ * stride + offset).first)
// while (i < 9 * totalTrees) yield(newDecoder(i++ * stride + offset))
}

fun sampleStrWithPCFG5(pcfgTable: Map<Int, Int>): Sequence<String> =
Expand Down
45 changes: 38 additions & 7 deletions src/jvmTest/kotlin/ai/hypergraph/kaliningraph/automata/WFSATest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,10 @@ class WFSATest {
val toRepair = "NAME : NEWLINE NAME = STRING NEWLINE NAME = NAME . NAME ( STRING ) NEWLINE"
val radius = 1
val pt = Grammars.seq2parsePythonCFG.makeLevPTree(toRepair, radius, shortS2PParikhMap)
val repairs = pt.sampleStrWithoutReplacement().distinct().take(100).toSet()
println("Found ${repairs.size} repairs by enumerating PTree")
println(pt.totalTrees.toString())
val maxResults = 10_000
val repairs = pt.sampleStrWithoutReplacement()
.distinct().take(maxResults).toSet()
measureTimedValue {
pt.propagator<Automaton<String, Double>>(
both = { a, b -> if (a == null) b else if (b == null) a else Concatenation(a, b) },
Expand All @@ -85,17 +87,46 @@ class WFSATest {
addTransition(s1, s2, a.root, 1.0)
}
}
)?.also { println("\n" + Operations.determinizeER(it).toDot().alsoCopy() + "\n") }
.also { println("Total: ${Automata.transitions(it).size} arcs, ${Automata.states(it).size}") }
.let { Automata.bestStrings(it, 1000).map { it.label.joinToString(" ") }.toSet() }
)
// ?.also { println("\n" + Operations.determinizeER(it).toDot().alsoCopy() + "\n") }
// .also { println("Total: ${Automata.transitions(it).size} arcs, ${Automata.states(it).size}") }
.let { Automata.bestStrings(it, maxResults).map { it.label.joinToString(" ") }.toSet() }
}.also {
println("Found ${it.value.size} repairs by decoding WFSA")
assertEquals(it.value, repairs)
println("Found ${it.value.size} unique repairs by decoding WFSA")
println("Found ${repairs.size} unique repairs by enumerating PTree")

// // Print side by side comparison of repairs
// repairs.sorted().forEach {
// val a = it
// val b = if (it in repairs) it else ""
// val colorA = levenshteinAlign(toRepair, a).paintANSIColors()
// val colorB = if (b.isEmpty()) "" else levenshteinAlign(toRepair, b).paintANSIColors()
// println("$colorA\n$colorB\n")
// }

assertEquals(it.value.size, repairs.size)
it.value.forEach {
println(levenshteinAlign(toRepair, it).paintANSIColors())
assertTrue(levenshtein(toRepair, it) <= radius)
assertTrue(it in Grammars.seq2parsePythonCFG.language)
}
}.also { println("Decoding ${it.value.size} repairs took ${it.duration}") }
}

/*
./gradlew jvmTest --tests "ai.hypergraph.kaliningraph.automata.WFSATest.testBijection"
*/
@Test
fun testBijection() {
val toRepair = "NAME : NEWLINE NAME = STRING NEWLINE NAME = NAME ( STRING ) NEWLINE"
val radius = 2
val pt = Grammars.seq2parsePythonCFG.makeLevPTree(toRepair, radius, shortS2PParikhMap)
println(pt.totalTrees.toString())
val maxResults = 10_000
val repairs = pt.sampleStrWithoutReplacement().take(maxResults).toList()
println("Found ${repairs.size} total repairs by enumerating PTree")
val distinct = repairs.toSet().size
// Why so many duplicates? A true bijection should have no duplicates
println("Found $distinct unique repairs by enumerating PTree")
}
}

0 comments on commit de5b097

Please sign in to comment.