Skip to content

Commit

Permalink
bring back neo4jcsv import/export, and also: (#182)
Browse files Browse the repository at this point in the history
* Schema.getNodePropertyNames(nodeLabel: String)
* Schema.getEdgePropertyName(edgeLabel: String)
* various refactorings
  • Loading branch information
mpollmeier authored Apr 29, 2024
1 parent be0df23 commit 5df6d1a
Show file tree
Hide file tree
Showing 48 changed files with 573 additions and 1,666 deletions.
5 changes: 4 additions & 1 deletion core/src/main/scala/flatgraph/DiffGraphApplier.scala
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,10 @@ private[flatgraph] class DiffGraphApplier(graph: Graph, diff: DiffGraphBuilder)
newPropertyView(insertionBaseIndex + insertionCounter) = insertion.property
} catch {
case _: ArrayStoreException =>
throw new UnsupportedOperationException(s"unsupported property type: ${insertion.property.getClass}")
val edgeType = graph.schema.getEdgeLabel(nodeKind, edgeKind)
throw new UnsupportedOperationException(
s"unsupported property type `${insertion.property.getClass}` for edge type `$edgeType`"
)
}
}
insertionCounter += 1
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/scala/flatgraph/Edge.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ package flatgraph
class Edge(val src: GNode, val dst: GNode, val edgeKind: Short, val subSeq: Int, val property: Any) {
def label: String =
src.graph.schema.getEdgeLabel(src.nodeKind, edgeKind)

def propertyName: Option[String] = None // override in subclass to provide a property name

def propertyMaybe: Option[Any] = Option(property)
}

object Edge {
Expand Down
6 changes: 3 additions & 3 deletions core/src/main/scala/flatgraph/Graph.scala
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,13 @@ class Graph(val schema: Schema, val storagePathMaybe: Option[Path] = None) exten
def nodeCount(label: String): Int =
livingNodeCountByKind(schema.getNodeKindByLabel(label))

def nodeCount(): Int =
def nodeCount: Int =
livingNodeCountByKind.sum

def allEdges: Iterator[Edge] =
allNodes.flatMap(Accessors.getEdgesOut)

def edgeCount(): Int =
def edgeCount: Int =
allEdges.size

/** Lookup nodes with a given label and property value via index. N.b. currently only supported for String properties. Context:
Expand Down Expand Up @@ -161,7 +161,7 @@ class Graph(val schema: Schema, val storagePathMaybe: Option[Path] = None) exten
}

override def toString(): String =
s"Graph[${nodeCount()} nodes]"
s"Graph[$nodeCount nodes]"

def nodeCountByLabel: Map[String, Int] = {
schema.nodeKinds
Expand Down
17 changes: 11 additions & 6 deletions core/src/main/scala/flatgraph/Schema.scala
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,14 @@ abstract class Schema {
def getNodeKindByLabelMaybe(label: String): Option[Int] = {
Option(getNodeKindByLabel(label)).filterNot(_ == UndefinedKind)
}
def getNodePropertyNames(nodeLabel: String): Set[String]

// So, the issue here is: We have a couple of pseudo-properties that can only exist at a single node kind
// (theoretically same for edges). We want to allow our data-layout to alias these properties. This means that multiple
// properties share the same propertyKind / slot. Hence, to go back from kind -> label, we also need to know the node-kind.
def getEdgeLabel(nodeKind: Int, edgeKind: Int): String
def getEdgeKindByLabel(label: String): Int
def getEdgePropertyName(label: String): Option[String]

def getPropertyLabel(nodeKind: Int, propertyKind: Int): String
def getPropertyKindByName(label: String): Int
Expand Down Expand Up @@ -149,15 +151,16 @@ abstract class Schema {

class FreeSchema(
nodeLabels: Array[String],
propertyLabels: Array[String],
propertyLabels: Array[String], // important: array order corresponds to `nodePropertyPrototypes` order!
nodePropertyPrototypes: Array[AnyRef],
propertyNamesByNodeLabel: Map[String, Set[String]],
edgeLabels: Array[String],
edgePropertyPrototypes: Array[AnyRef],
formalQuantities: Array[FormalQtyType.FormalQuantity] = null
) extends Schema {
val nodeMap = nodeLabels.zipWithIndex.toMap
val propMap = propertyLabels.zipWithIndex.toMap
val edgeMap = edgeLabels.zipWithIndex.toMap
private val nodeMap = nodeLabels.zipWithIndex.toMap
private val propMap = propertyLabels.zipWithIndex.toMap
private val edgeMap = edgeLabels.zipWithIndex.toMap

val edgePropertyTypes: Array[FormalQtyType.FormalType] = edgePropertyPrototypes.map(fromPrototype)
val nodePropertyTypes: Array[FormalQtyType.FormalType] = nodePropertyPrototypes.map(fromPrototype)
Expand All @@ -181,10 +184,12 @@ class FreeSchema(
override def getNodeKindByLabel(label: String): Int = nodeMap.getOrElse(label, Schema.UndefinedKind)
override def getEdgeLabel(nodeKind: Int, edgeKind: Int): String = edgeLabels(edgeKind)
override def getEdgeKindByLabel(label: String): Int = edgeMap.getOrElse(label, Schema.UndefinedKind)
override def getEdgePropertyName(label: String): Option[String] = None
override def getPropertyLabel(nodeKind: Int, propertyKind: Int): String = propertyLabels(propertyKind)
override def getPropertyKindByName(label: String): Int = propMap.getOrElse(label, Schema.UndefinedKind)
override def getNumberOfPropertyKinds: Int = propertyLabels.length
override def makeNode(graph: Graph, nodeKind: Short, seq: Int): GNode = new GNode(graph, nodeKind, seq)
override def getNodePropertyNames(nodeLabel: String): Set[String] = propertyNamesByNodeLabel.getOrElse(nodeLabel, default = Set.empty)
override def getNumberOfPropertyKinds: Int = propertyLabels.length
override def makeNode(graph: Graph, nodeKind: Short, seq: Int): GNode = new GNode(graph, nodeKind, seq)
override def makeEdge(src: GNode, dst: GNode, edgeKind: Short, subSeq: Int, property: Any): Edge =
new Edge(src, dst, edgeKind, subSeq, property)

Expand Down
11 changes: 8 additions & 3 deletions core/src/main/scala/flatgraph/storage/Deserialization.scala
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,14 @@ object Deserialization {
}

private def freeSchemaFromManifest(manifest: Manifest.GraphItem): FreeSchema = {
val nodeLabels = manifest.nodes.map { n => n.nodeLabel }
val nodePropNames = mutable.LinkedHashMap[String, AnyRef]()
val nodeLabels = manifest.nodes.map { n => n.nodeLabel }
val nodePropNames = mutable.LinkedHashMap.empty[String, AnyRef]
val propertyNamesByNodeLabel = mutable.LinkedHashMap.empty[String, Set[String]]
for (prop <- manifest.properties) {
propertyNamesByNodeLabel.updateWith(prop.nodeLabel) {
case None => Some(Set(prop.propertyLabel))
case Some(oldEntries) => Some(oldEntries + prop.propertyLabel)
}
nodePropNames(prop.propertyLabel) = protoFromOutline(prop.property)
}
val propertyLabels = nodePropNames.keysIterator.toArray
Expand All @@ -117,7 +122,7 @@ object Deserialization {
val edgeLabels = edgePropNames.keysIterator.toArray
val edgePropertyPrototypes = edgePropNames.valuesIterator.toArray

new FreeSchema(nodeLabels, propertyLabels, nodePropertyPrototypes, edgeLabels, edgePropertyPrototypes)
new FreeSchema(nodeLabels, propertyLabels, nodePropertyPrototypes, propertyNamesByNodeLabel.toMap, edgeLabels, edgePropertyPrototypes)
}

private def protoFromOutline(outline: OutlineStorage): AnyRef = {
Expand Down
12 changes: 12 additions & 0 deletions core/src/main/scala/flatgraph/traversal/Language.scala
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,18 @@ class NodeMethods(node: GNode) extends AnyVal {
}
}

def propertiesMap: java.util.Map[String, AnyRef] = {
val ret = new java.util.HashMap[String, AnyRef]()
val schema = node.graph.schema
schema.propertyKinds.foreach { propertyKind =>
Accessors.getNodePropertyOptionCompat(node, propertyKind).foreach { value =>
val key = schema.getPropertyLabel(node.nodeKind, propertyKind)
ret.put(key, value)
}
}
ret
}

private def edgeKind(edgeLabel: String): Int =
node.graph.schema.getEdgeKindByLabel(edgeLabel)
}
Expand Down
1 change: 1 addition & 0 deletions core/src/test/scala/flatgraph/GraphTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,7 @@ class GraphTests extends AnyWordSpec with Matchers {
nodeLabels = Array(Node0Label),
edgeLabels = Array.empty,
propertyLabels = Array(propertySingle, propertyOptional, propertyMulti).map(_.name),
propertyNamesByNodeLabel = Map.empty,
edgePropertyPrototypes = new Array[AnyRef](0),
nodePropertyPrototypes = Array(new Array[String](0), new Array[String](0), new Array[String](0)),
formalQuantities = Array(FormalQtyType.QtyOne, null, FormalQtyType.QtyOption, null, FormalQtyType.QtyMulti)
Expand Down
1 change: 1 addition & 0 deletions core/src/test/scala/flatgraph/TestHelpers.scala
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ object TestSchema {
nodeLabels = Range(0, nodeKinds).map { id => s"V${id}" }.toArray,
edgeLabels = Range(0, edgeKinds).map { id => s"${id}" }.toArray,
propertyLabels = Range(0, properties).map { id => s"${id}" }.toArray,
propertyNamesByNodeLabel = Map.empty,
edgePropertyPrototypes = if (edgePropertyPrototypes != null) edgePropertyPrototypes else new Array[AnyRef](edgeKinds),
nodePropertyPrototypes = if (nodePropertyPrototypes != null) nodePropertyPrototypes else new Array[AnyRef](properties),
formalQtys
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ object SimpleDomain {
nodeLabels = Array(Thing.Label),
edgeLabels = edgeLabels,
propertyLabels = Array(ExampleGraphSetup.Properties.Name.name),
propertyNamesByNodeLabel = Map.empty,
edgePropertyPrototypes = new Array(edgeLabels.length),
nodePropertyPrototypes = Array(Array.empty[String])
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class DomainClassesGenerator(schema: Schema) {

val propertyKindByProperty = relevantProperties.zipWithIndex.toMap
val edgeTypes = schema.edgeTypes.sortBy(_.name).toArray
val edgeIdByType = edgeTypes.zipWithIndex.toMap
val edgeKindByType = edgeTypes.zipWithIndex.toMap

val newPropertiesByNodeType: Map[AbstractNodeType, Set[Property[?]]] =
schema.allNodeTypes.map { nodeType =>
Expand Down Expand Up @@ -82,8 +82,8 @@ class DomainClassesGenerator(schema: Schema) {
// format: off
val edgeAccess = edgeTypes.map { et =>
s"""
|final def _${camelCase(et.name)}Out: Iterator[StoredNode] = flatgraph.Accessors.getNeighborsOut(this.graph, this.nodeKind, this.seq, ${edgeIdByType(et)}).asInstanceOf[Iterator[StoredNode]]
|final def _${camelCase(et.name)}In: Iterator[StoredNode] = flatgraph.Accessors.getNeighborsIn(this.graph, this.nodeKind, this.seq, ${edgeIdByType(et)}).asInstanceOf[Iterator[StoredNode]]
|final def _${camelCase(et.name)}Out: Iterator[StoredNode] = flatgraph.Accessors.getNeighborsOut(this.graph, this.nodeKind, this.seq, ${edgeKindByType(et)}).asInstanceOf[Iterator[StoredNode]]
|final def _${camelCase(et.name)}In: Iterator[StoredNode] = flatgraph.Accessors.getNeighborsIn(this.graph, this.nodeKind, this.seq, ${edgeKindByType(et)}).asInstanceOf[Iterator[StoredNode]]
|""".stripMargin
}.mkString("\n")
// format: on
Expand Down Expand Up @@ -214,27 +214,39 @@ class DomainClassesGenerator(schema: Schema) {
if (edgeType.properties.length > 1) throw new RuntimeException("we only support zero or one edge properties")

// format: off
val propertyAccessor = if (edgeType.properties.length == 1) {
val p = edgeType.properties.head
p.cardinality match {
case _: Cardinality.One[?] =>
s"""{
| def ${camelCase(p.name)}: ${unpackTypeUnboxed(p.valueType, true )} = this.property.asInstanceOf[${unpackTypeUnboxed(p.valueType, true)}]
|}""".stripMargin
case Cardinality.ZeroOrOne =>
s"""{
| def ${camelCase(p.name)}: Option[${unpackTypeUnboxed(p.valueType, true)}] = Option(this.property.asInstanceOf[${unpackTypeBoxed(p.valueType, true)}])
|}""".stripMargin
case Cardinality.List => throw new RuntimeException("edge properties are only supported with cardinality one or optional")
}
} else ""
val propertyAccessorMaybe: Option[String] = edgeType.properties match {
case Nil => None
case Seq(p) =>
p.cardinality match {
case _: Cardinality.One[?] =>
Some(s"""def ${camelCase(p.name)}: ${unpackTypeUnboxed(p.valueType, true )} =
| this.property.asInstanceOf[${unpackTypeUnboxed(p.valueType, true)}]""".stripMargin)
case Cardinality.ZeroOrOne =>
Some(s"""def ${camelCase(p.name)}: Option[${unpackTypeUnboxed(p.valueType, true)}] =
| Option(this.property.asInstanceOf[${unpackTypeBoxed(p.valueType, true)}])""".stripMargin)
case Cardinality.List =>
throw new RuntimeException("edge properties are only supported with cardinality one or optional")
}
case multipleProperties =>
throw new RuntimeException(s"only one edge property supported - ${edgeType.name} defines ${multipleProperties.size}")
}

val propertyNameImplForObject = edgeType.properties.headOption.map { property=>
s"""val propertyName: Option[String] = Some("${property.name}")"""
}
val propertyNameImplForClass = propertyNameImplForObject.map { _ =>
s"override def propertyName: Option[String] = ${edgeType.className}.propertyName"
}

s"""object ${edgeType.className} {
| val Label = "${edgeType.name}"
| ${propertyNameImplForObject.getOrElse("")}
|}
|
|class ${edgeType.className}(src_4762: flatgraph.GNode, dst_4762: flatgraph.GNode, subSeq_4862: Int, property_4862: Any)
| extends flatgraph.Edge(src_4762, dst_4762, ${edgeKindByEdgeType(edgeType)}.toShort, subSeq_4862, property_4862) $propertyAccessor
| extends flatgraph.Edge(src_4762, dst_4762, ${edgeKindByEdgeType(edgeType)}.toShort, subSeq_4862, property_4862) {
| ${propertyNameImplForClass.getOrElse("")}
|}
|""".stripMargin
// format: on
}
Expand Down Expand Up @@ -605,30 +617,54 @@ class DomainClassesGenerator(schema: Schema) {
sourceLines.result()
}

val nodePropertyNameCases = for {
nodeType <- nodeTypes
propertyNames = nodeType.properties.map(p => s""""${p.name}"""").mkString(", ")
} yield s"""case "${nodeType.name}" => Set($propertyNames)"""

val edgePropertyNameCases = for {
edgeType <- edgeTypes
property <- edgeType.properties.headOption
} yield s"""case "${edgeType.name}" => Some("${property.name}")"""

// format: off
s"""package $basePackage
|
|import $basePackage.nodes
|import $basePackage.edges
|import flatgraph.FormalQtyType
|
|object GraphSchema extends flatgraph.Schema {
| private val nodeLabels = IndexedSeq($nodeLabelsSrc)
| val nodeKindByLabel = nodeLabels.zipWithIndex.toMap
| val edgeLabels = Array(${edgeTypes.map { e => s""""${e.name}"""" }.mkString(", ")})
| val edgeIdByLabel = edgeLabels.zipWithIndex.toMap
| val edgeLabels = Array(${edgeTypes.map { e => s""""${e.name}"""" }.mkString(", ")})
| val edgeKindByLabel = edgeLabels.zipWithIndex.toMap
| val edgePropertyAllocators: Array[Int => Array[?]] = Array($edgePropertyAllocatorsSrc)
| val nodeFactories: Array[(flatgraph.Graph, Int) => nodes.StoredNode] = Array($nodeFactoriesSrc)
| val edgeFactories: Array[(flatgraph.GNode, flatgraph.GNode, Int, Any) => flatgraph.Edge] = Array($edgeFactoriesSrc)
| val nodePropertyAllocators: Array[Int => Array[?]] = Array($nodePropertyAllocatorsSrc)
| val normalNodePropertyNames = Array(${relevantProperties.map { p => s""""${p.name}"""" }.mkString(", ")})
| val nodePropertyByLabel = normalNodePropertyNames.zipWithIndex.toMap$nodePropertyByLabelSrc
| val nodePropertyDescriptors: Array[FormalQtyType.FormalQuantity | FormalQtyType.FormalType] = ${nodePropertyDescriptorsSource
.mkString("\n")}
| val nodePropertyDescriptors: Array[FormalQtyType.FormalQuantity | FormalQtyType.FormalType] = ${nodePropertyDescriptorsSource.mkString("\n")}
| override def getNumberOfNodeKinds: Int = ${nodeTypes.length}
| override def getNumberOfEdgeKinds: Int = ${edgeTypes.length}
| override def getNodeLabel(nodeKind: Int): String = nodeLabels(nodeKind)
| override def getNodeKindByLabel(label: String): Int = nodeKindByLabel.getOrElse(label, flatgraph.Schema.UndefinedKind)
| override def getEdgeLabel(nodeKind: Int, edgeKind: Int): String = edgeLabels(edgeKind)
| override def getEdgeKindByLabel(label: String): Int = edgeIdByLabel.getOrElse(label, flatgraph.Schema.UndefinedKind)
| override def getEdgeKindByLabel(label: String): Int = edgeKindByLabel.getOrElse(label, flatgraph.Schema.UndefinedKind)
| override def getNodePropertyNames(nodeLabel: String): Set[String] = {
| nodeLabel match {
| ${nodePropertyNameCases.mkString("\n")}
| case _ => Set.empty
| }
| }
| override def getEdgePropertyName(label: String): Option[String] = {
| label match {
| ${edgePropertyNameCases.mkString("\n")}
| case _ => None
| }
| }
|
| override def getPropertyLabel(nodeKind: Int, propertyKind: Int): String = {
| if(propertyKind < ${relevantProperties.length}) normalNodePropertyNames(propertyKind)
| $containedNodesAsPropertyCases
Expand All @@ -643,6 +679,7 @@ class DomainClassesGenerator(schema: Schema) {
| override def getNodePropertyFormalType(nodeKind: Int, propertyKind: Int): FormalQtyType.FormalType = nodePropertyDescriptors(propertyOffsetArrayIndex(nodeKind, propertyKind)).asInstanceOf[FormalQtyType.FormalType]
| override def getNodePropertyFormalQuantity(nodeKind: Int, propertyKind: Int): FormalQtyType.FormalQuantity = nodePropertyDescriptors(1 + propertyOffsetArrayIndex(nodeKind, propertyKind)).asInstanceOf[FormalQtyType.FormalQuantity]
|}""".stripMargin
// format: on
}
os.write(outputDir0 / "GraphSchema.scala", schemaFile)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
LOAD CSV FROM 'file:/edges_connected_to_data.csv' AS line
MATCH (a), (b)
WHERE a.id = toInteger(line[0]) AND b.id = toInteger(line[1])
CREATE (a)-[r:connected_to {string_mandatory: line[3]}]->(b);
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0,1,connected_to,edge property
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
:START_ID,:END_ID,:TYPE,string_mandatory:string
10 changes: 10 additions & 0 deletions formats-tests/src/test/resources/neo4jcsv/nodes_node_a_cypher.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
LOAD CSV FROM 'file:/nodes_node_a_data.csv' AS line
CREATE (:node_a {
id: toInteger(line[0]),
int_list: toIntegerList(split(line[2], ";")),
int_mandatory: toInteger(line[3]),
int_optional: toInteger(line[4]),
string_list: toStringList(split(line[5], ";")),
string_mandatory: line[6],
string_optional: line[7]
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
0,node_a,,42,,node 3 c1;node 3 c2,node 2 a,node 2 b
1,node_a,10;11;12,1,2,,<empty>,
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
:ID,:LABEL,int_list:int[],int_mandatory:int,int_optional:int,string_list:string[],string_mandatory:string,string_optional:string
2 changes: 0 additions & 2 deletions formats-tests/src/test/resources/neo4jcsv/testedges_data.csv

This file was deleted.

This file was deleted.

3 changes: 0 additions & 3 deletions formats-tests/src/test/resources/neo4jcsv/testnodes_data.csv

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ object TestDomainSimple {
PropertyNames.ContainedTestNodeProperty
),
nodePropertyPrototypes = Array(Array.empty[String], Array.empty[String], Array.emptyIntArray, Array.emptyIntArray, Array.empty[GNode]),
propertyNamesByNodeLabel = Map.empty,
edgePropertyPrototypes = Array(Array.emptyLongArray)
)

Expand Down
Loading

0 comments on commit 5df6d1a

Please sign in to comment.