-
Notifications
You must be signed in to change notification settings - Fork 0
/
graphdb.go
319 lines (284 loc) · 8.36 KB
/
graphdb.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"time"
"github.com/neo4j/neo4j-go-driver/v4/neo4j"
"github.com/spf13/viper"
)
func check() {
driver := getDriver()
defer driver.Close()
err := verify(driver)
if err != nil {
// print error
fmt.Println("Error:", err)
panic(err)
}
// print error
fmt.Println("Neo4j is running")
}
// Create Neo4j Driver
func getDriver() neo4j.Driver {
// print neo4j config
fmt.Println("neo4j config:")
fmt.Println("neo4j.uri:", viper.GetString("neo4j.uri"))
fmt.Println("neo4j.user:", viper.GetString("neo4j.user"))
fmt.Println("neo4j.password:", viper.GetString("neo4j.password"))
driver, err := neo4j.NewDriver(viper.GetString("neo4j.uri"),
neo4j.BasicAuth(viper.GetString("neo4j.user"),
viper.GetString("neo4j.password"), ""))
if err != nil {
panic(err)
}
return driver
}
// Verify Connectivity
func verify(driver neo4j.Driver) error {
err := driver.VerifyConnectivity()
return err
}
// Export to Neo4j
func export(sourceDir string) {
// Check if sourceDir directory exists
if _, err := os.Stat(sourceDir); os.IsNotExist(err) {
fmt.Println("Error:", err)
panic(err)
}
// Get list of markdown files in sourceDir
files, err := getFiles(sourceDir)
if err != nil {
fmt.Println("Error:", err)
panic(err)
}
nodes, relationships := getFilesAsGraph(files)
// Create a neo4j driver and defer closing it
driver := getDriver()
defer driver.Close()
// Open a new session and defer closing it
session, err := driver.Session(neo4j.AccessModeWrite)
if err != nil {
fmt.Println("Error:", err)
panic(err)
}
defer session.Close()
// Build graph in Neo4j using cypher queries
// Create nodes
for _, node := range nodes {
// Create node
cypher := fmt.Sprintf("CREATE (n:Page {title:'%s'})", node)
fmt.Println(cypher)
_, err := session.WriteTransaction(func(tx neo4j.Transaction) (interface{}, error) {
result, err := tx.Run(cypher, nil)
if err != nil {
return nil, err
}
return result, nil
})
if err != nil {
fmt.Println("Error:", err)
panic(err)
}
}
// Create relationships
for _, relationship := range relationships {
// Create relationship
cypher := fmt.Sprintf("MATCH (n:Page {title:'%s'}), (m:Page {title:'%s'}) CREATE (n)-[:LINKS_TO]->(m)", relationship[0], relationship[1])
fmt.Println(cypher)
_, err := session.WriteTransaction(func(tx neo4j.Transaction) (interface{}, error) {
result, err := tx.Run(cypher, nil)
if err != nil {
return nil, err
}
return result, nil
})
if err != nil {
fmt.Println("Error:", err)
panic(err)
}
}
// Print success message
fmt.Println("Neo4j import successful")
// Print number of nodes created
fmt.Println("Nodes created:", len(nodes))
// Print number of relationships created
fmt.Println("Relationships created:", len(relationships))
// Print number of files processed
fmt.Println("Files processed:", len(files))
}
// Get list of markdown files in sourceDir
func getFiles(sourceDir string) ([]string, error) {
// Get list of markdown files in sourceDir
files, err := filepath.Glob(sourceDir + "/*.md")
if err != nil {
return nil, err
}
return files, nil
}
// Get Markdown title in CamelCase filename minus .md
func getMarkdownTitleCamelCase(file string) string {
// Remove directory path from filename
filename := filepath.Base(file)
// Extract filename and title and store in a node map
title := filename[:len(filename)-3]
// Remove directory from title
return title
}
// Get relationships: internal links
func getRelationships(file string) [][2]string {
links := getLinks(file)
relationships := make([][2]string, 0)
for _, link := range links {
relationships = append(relationships, [2]string{getMarkdownTitleCamelCase(file), link})
}
return relationships
}
// Get links: Parse and match all internal links in markdown file
func getLinks(file string) []string {
// Open file
f, err := os.Open(file)
if err != nil {
panic(err)
}
defer f.Close()
// Read file
b, err := ioutil.ReadAll(f)
if err != nil {
panic(err)
}
// Close file
if err := f.Close(); err != nil {
panic(err)
}
// Parse markdown text
markdown := string(b)
// Match all links
re := regexp.MustCompile(`(?mU)\[([^\[]+)\](\(.*\))`)
links := re.FindAllString(markdown, -1)
// Keep only destination of link
re = regexp.MustCompile(`\(.*?\)`)
for i, link := range links {
links[i] = re.FindString(link)
links[i] = links[i][1 : len(links[i])-1]
}
return links
}
// Clear database: Delete all nodes and relationships in neo4j database
func clearDatabase() {
// Get driver
driver := getDriver()
// Open session
session, err := driver.Session(neo4j.AccessModeWrite)
if err != nil {
fmt.Println("Error:", err)
panic(err)
}
defer session.Close()
// Build a cypher query to:
// - delete all nodes and relationships
// - return the deleted nodes and relationships
cypher := "MATCH (n) DETACH DELETE n"
// Execute the cypher query with a write transaction
// - Handle any errors
// - Verify the number of nodes and relationships deleted
_, err = session.WriteTransaction(func(tx neo4j.Transaction) (interface{}, error) {
result, err := tx.Run(cypher, nil)
if err != nil {
return "Sorry, it didn't work", err
}
if result.Next() {
fmt.Println("Nodes deleted:", result.Record().GetByIndex(0))
fmt.Println("Relationships deleted:", result.Record().GetByIndex(1))
}
return nil, err
})
if err != nil {
fmt.Println("Error:", err)
panic(err)
}
// Print success message
fmt.Println("Neo4j clear successful")
}
func getFilesAsGraph(files []string) ([]string, [][2]string) {
// Create a slice of strings to store the nodes
nodes := make([]string, 0)
// Create a slice of [2]strings to store the relationships
relationships := make([][2]string, 0) // [source, destination]])
// For each markdown file
for _, file := range files {
// Extract title and store in a node slice
nodes = append(nodes, getMarkdownTitleCamelCase(file))
// Extract relationships and store in a relationship slice
relationships = append(relationships, getRelationships(file)...)
}
// Discard and log relationships where the destination node is not in the nodes slice
var discardedDestinations []string
var internalLinkRelationships [][2]string
for _, relationship := range relationships {
// Check destination node in relationship string array against nodes slice
if !contains(nodes, relationship[1]) {
// If not in nodes slice, add to discardedDestinations slice
discardedDestinations = append(discardedDestinations, relationship[1])
} else {
// If in nodes slice, add to internalLinkRelationships slice
internalLinkRelationships = append(internalLinkRelationships, relationship)
}
}
// Log discarded relationships log folder and file
logFolder := "./logs"
// Create log folder if it does not exist
if _, err := os.Stat(logFolder); os.IsNotExist(err) {
os.Mkdir(logFolder, 0755)
}
// Name log file based on time
logFile := logFolder + "/" + time.Now().Format(time.RFC3339)
// Create log file
f, err := os.Create(logFile)
if err != nil {
panic(err)
}
defer f.Close()
// Write log file
f.WriteString("Discarded relationships:")
for _, discardedDestination := range discardedDestinations {
f.WriteString("\n" + discardedDestination)
}
// Print success message
fmt.Println("Discarded internal link relationships logged to:", logFile)
// Remove internalLinkRelationships where source and target are the same
var filteredInternalLinkRelationships [][2]string
for _, relationship := range internalLinkRelationships {
if relationship[0] != relationship[1] {
filteredInternalLinkRelationships = append(filteredInternalLinkRelationships, relationship)
}
}
// Remove duplicates from filteredInternalLinkRelationships
var uniqueInternalLinkRelationships [][2]string
for _, relationship := range filteredInternalLinkRelationships {
if !containsRelationship(uniqueInternalLinkRelationships, relationship) {
uniqueInternalLinkRelationships = append(uniqueInternalLinkRelationships, relationship)
}
}
// Return nodes and relationships
return nodes, uniqueInternalLinkRelationships
}
func contains(s []string, e string) bool {
for _, a := range s {
if a == e {
return true
}
}
return false
}
// Check if relationship is already in slice
func containsRelationship(s [][2]string, e [2]string) bool {
for _, a := range s {
if a[0] == e[0] && a[1] == e[1] {
return true
}
}
return false
}