Skip to content

Commit

Permalink
bring back and fix algorithms from odb incl. tests
Browse files Browse the repository at this point in the history
  • Loading branch information
mpollmeier committed Apr 25, 2024
1 parent 44f3a9b commit ce84d7f
Show file tree
Hide file tree
Showing 8 changed files with 404 additions and 1 deletion.
36 changes: 36 additions & 0 deletions core/src/main/scala/flatgraph/algorithm/DependencySequencer.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package flatgraph.algorithm

import scala.annotation.tailrec

object DependencySequencer {

/** Find the sequence of dependencies a set of nodes in a directed acyclic graph (DAG). Sample use case: concurrent task processing: given
* a set of tasks, determine which ones can be executed in parallel, and which ones need to run in sequence.
*
* @throws java.lang.AssertionError
* if given nodes have cyclic dependencies
*
* Algorithm: variant of Kahn's algorithm for topological sort 1) for given nodes, find all leaves, i.e. the those without parents (e.g.
* task dependencies) 2) disregard all that have already been visited and add to the results sequence 3) repeat for the remainder of
* nodes
*
* see https://en.wikipedia.org/wiki/Topological_sorting#Kahn%27s_algorithm
*/
def apply[A: GetParents](nodes: Set[A]): Seq[Set[A]] = {
apply0(nodes, Seq.empty, Set.empty)
}

@tailrec
private def apply0[A: GetParents](nodes: Set[A], accumulator: Seq[Set[A]], visited: Set[A]): Seq[Set[A]] = {
if (nodes.size == 0) {
accumulator
} else {
val getParents = implicitly[GetParents[A]]
val leaves = nodes.filter(getParents(_).diff(visited).isEmpty)
val remainder = nodes.diff(leaves)
assert(remainder.size < nodes.size, s"given set of nodes is not a directed acyclic graph (DAG): ${nodes ++ accumulator.flatten}")
apply0(remainder, accumulator :+ leaves, visited ++ leaves)
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package flatgraph.algorithm

/** Find the lowest common ancestor(s)
*
* 1) for each relevant node, find their recursive parents 2) create the intersection of all of those sets 3) the LCA are those nodes, that
* do not have any children in that set
*
* based on https://www.baeldung.com/cs/lowest-common-ancestor-acyclic-graph
*/
object LowestCommonAncestors {

def apply[A](nodes: Set[A])(parents: A => Set[A]): Set[A] = {

def parentsRecursive(node: A, seen: Set[A] = Set.empty): Set[A] = {
val nodeParents = parents(node) -- seen
nodeParents ++ nodeParents.flatMap(node => parentsRecursive(node, seen ++ nodeParents))
}

if (nodes.size <= 1) {
nodes
} else {
val (head, tail) = (nodes.head, nodes.tail)
val parentsIntersection = tail.foldLeft(parentsRecursive(head)) { case (res, next) =>
res.intersect(parentsRecursive(next))
}

parentsIntersection.filter { node =>
val childCount = parentsIntersection.count(parentsRecursive(_).contains(node))
childCount == 0
}
}
}

}
68 changes: 68 additions & 0 deletions core/src/main/scala/flatgraph/algorithm/PathFinder.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package flatgraph.algorithm

import flatgraph.Edge.Direction
import flatgraph.GNode
import flatgraph.traversal.Language.*

object PathFinder {
def apply(nodeA: GNode, nodeB: GNode, maxDepth: Int = -1): Seq[Path] = {
if (nodeA == nodeB) Seq(Path(Seq(nodeA)))
else {
Iterator
.single(nodeA)
.enablePathTracking
.repeat(_.both) { initialBehaviour =>
val behaviour = initialBehaviour.dedup // no cycles
.until(_.is(nodeB)) // don't continue on a given path if we've reached our destination
if (maxDepth > -1) behaviour.maxDepth(maxDepth)
else behaviour
}
.is(nodeB) // we only care about the paths that lead to our destination
.path
.cast[Seq[GNode]]
.map(Path.apply)
.toSeq
}
}

case class Path(nodes: Seq[GNode]) {
def withEdges: PathWithEdges = {
val elements = Seq.newBuilder[PathEntry]
nodes.headOption.foreach { firstElement =>
elements.addOne(NodeEntry(firstElement))
}

for {
case Seq(nodeA, nodeB) <- nodes.sliding(2)
edgesBetweenAsPathEntry: PathEntry =
edgesBetween(nodeA, nodeB) match {
case Nil =>
throw new AssertionError(s"no edges between nodes $nodeA and $nodeB - this looks like a bug in PathFinder")
case Seq(edgeEntry) => edgeEntry
case multipleEdges => EdgeEntries(multipleEdges)
}
} {
elements.addOne(edgesBetweenAsPathEntry)
elements.addOne(NodeEntry(nodeB))
}

PathWithEdges(elements.result())
}
}

private def edgesBetween(nodeA: GNode, nodeB: GNode): Seq[EdgeEntry] = {
val outEdges = nodeA.outE.filter(_.dst == nodeB).map(edge => EdgeEntry(Direction.Outgoing, edge.label))
val inEdges = nodeA.inE.filter(_.src == nodeB).map(edge => EdgeEntry(Direction.Incoming, edge.label))
outEdges.to(Seq) ++ inEdges.to(Seq)
}

case class PathWithEdges(elements: Seq[PathEntry])
sealed trait PathEntry
case class NodeEntry(node: GNode) extends PathEntry {
def label: String = node.label()
def id: Long = node.id()
}
case class EdgeEntries(edgeEntries: Seq[EdgeEntry]) extends PathEntry
case class EdgeEntry(direction: Direction, label: String) extends PathEntry

}
9 changes: 9 additions & 0 deletions core/src/main/scala/flatgraph/algorithm/package.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package flatgraph

package object algorithm {

trait GetParents[A] {
def apply(a: A): Set[A]
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package flatgraph.algorithm

import org.scalatest.matchers.should.Matchers.*
import org.scalatest.wordspec.AnyWordSpec

class DependencySequencerTests extends AnyWordSpec {

"empty graph" in {
DependencySequencer(Set.empty[Node]) shouldBe Seq.empty
}

"one node" in {
val A = new Node("A")
DependencySequencer(Set(A)) shouldBe Seq(Set(A))
}

"two independent nodes" in {
val A = new Node("A")
val B = new Node("B")
DependencySequencer(Set(A, B)) shouldBe Seq(Set(A, B))
}

"two nodes in sequence" in {
val A = new Node("A")
val B = new Node("B", Set(A))
DependencySequencer(Set(A, B)) shouldBe Seq(Set(A), Set(B))
}

"sequence and parallelism - simple 1" in {
val A = new Node("A")
val B = new Node("B")
val C = new Node("C", Set(A, B))
DependencySequencer(Set(A, B, C)) shouldBe Seq(Set(A, B), Set(C))
}

"sequence and parallelism - simple 2" in {
val A = new Node("A")
val B = new Node("B", Set(A))
val C = new Node("C", Set(A))
DependencySequencer(Set(A, B, C)) shouldBe Seq(Set(A), Set(B, C))
}

"throw error if it's not a DAG" in {
val A = new Node("A")
val B = new Node("B", Set(A))
A.parents = Set(B) // cycle in dependencies, not a DAG any longer
assertThrows[AssertionError](DependencySequencer(Set(A, B)))
}

"larger graph 1" in {
// format: off
/** \+-------------------+
* \| v
* \+---+ +---+ +---+ +---+
* \| A | --> | B | --> | C | --> | E |
* \+---+ +---+ +---+ +---+
* \| ^ v |
* \+---+ |
* \| D | ----------------+
* \+---+
*/
// format: on
val A = new Node("A")
val B = new Node("B", Set(A))
val C = new Node("C", Set(B))
val D = new Node("D", Set(B))
val E = new Node("E", Set(B, C, D))
DependencySequencer(Set(A, B, C, D, E)) shouldBe Seq(Set(A), Set(B), Set(C, D), Set(E))
}

"larger graph 2" in {
// format: off
/** \+-----------------------------+
* \| v
* \+---+ +---+ +---+ +---+ +---+
* \| A | --> | B | --> | D | --> | E | --> | F |
* \+---+ +---+ +---+ +---+ +---+
* \| ^ v |
* \+---+ |
* \| C | --------------------------+
* \+---+
*/
// format: on
val A = new Node("A")
val B = new Node("B", Set(A))
val C = new Node("C", Set(B))
val D = new Node("D", Set(B))
val E = new Node("E", Set(D))
val F = new Node("F", Set(B, C, E))
DependencySequencer(Set(A, B, C, D, E, F)) shouldBe Seq(Set(A), Set(B), Set(C, D), Set(E), Set(F))
// note: for task processing this isn't actually the optimal solution,
// because E will only start after [C|D] are finished... it wouldn't need to wait for C though...
}

class Node(val name: String, var parents: Set[Node] = Set.empty) {
override def toString = name
}
implicit def getParents: GetParents[Node] = (node: Node) => node.parents
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package flatgraph.algorithm

import org.scalatest.matchers.should.Matchers.*
import org.scalatest.wordspec.AnyWordSpec

class LowestCommonAncestorsTests extends AnyWordSpec {

/** +--------------+
* | |
* | +---+ +---+ +---+ +---+ +---+ +---+
* | | A | --> | C | --> | D | --> | | --> | H | --> | I |
* | +---+ +---+ +---+ | | +---+ +---+
* | | | | |
* | | +---------------> | G |
* | v | |
* | +---+ | | +---+
* | | B | ----------------------> | | --> | F |
* | +---+ +---+ +---+
* | |
* | |
* | v
* | +---+
* +> | E |
* +---+
*
* created by `graph-easy --input=lca.eg`, where lca.eg:
* [A] --> [B],[C]
* [B] --> [E],[G]
* [C] --> [D],[E],[G]
* [D] --> [G]
* [G] --> [F],[H]
* [H] --> [I]
*/

val A = new Node("A", Set.empty)
val B = new Node("B", Set(A))
val C = new Node("C", Set(A))
val D = new Node("D", Set(C))
val E = new Node("E", Set(B, C))
val G = new Node("G", Set(B, C, D))
val F = new Node("F", Set(G))
val H = new Node("H", Set(G))
val I = new Node("I", Set(H))

"empty set" in {
val relevantNodes = Set.empty[Node]
LowestCommonAncestors(relevantNodes)(_.parents) shouldBe Set.empty
}

"one node" in {
val relevantNodes = Set(D)
LowestCommonAncestors(relevantNodes)(_.parents) shouldBe relevantNodes
}

"node E and H" in {
val relevantNodes = Set(E, H)
LowestCommonAncestors(relevantNodes)(_.parents) shouldBe Set(B, C)
}

"node B,E,H" in {
val relevantNodes = Set(B, E, H)
LowestCommonAncestors(relevantNodes)(_.parents) shouldBe Set(A)
}

"node A,B,E,H" in {
val relevantNodes = Set(A, B, E, H)
LowestCommonAncestors(relevantNodes)(_.parents) shouldBe Set.empty
}

"cyclic dependencies" in {
val A = new Node("A", Set.empty)
val B = new Node("B", Set(A))
A.parents = Set(B) // cycle in dependencies, not a DAG any longer
LowestCommonAncestors(Set(A, B))(_.parents) shouldBe Set.empty
}

class Node(val name: String, var parents: Set[Node]) {
override def toString = name
}
implicit def getParents: GetParents[Node] = (node: Node) => node.parents
}
Loading

0 comments on commit ce84d7f

Please sign in to comment.