-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNeighbourClassifier.py
executable file
·86 lines (59 loc) · 2.48 KB
/
NeighbourClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from Constants import *
################################################################################
# Classifies the data based on nearest neighbours
def neighbourClassifier(x, edgeDict):
predictions = []
for i in range(len(x)):
source = x[i][0]
sink = x[i][1]
neighbours = getNeighbours(source, edgeDict, k = 10)
if (len(neighbours) == 0):
# Guess if there are no neighbours
pred = 0.5
else:
# Get the percentage of neighbours that follow this sink
pred = sum([1 if sink in edgeDict.get(neighbours[j][0], []) else 0
for j in range(len(neighbours))]) / len(neighbours)
# Min-max values
if (pred <= 0.2):
pred = 0.01
else:
pred = 0.99
predictions.append(pred)
return predictions
################################################################################
# Returns the k nodes closest to the source node
def getNeighbours(source, edgeDict, k = 10, verbose = False):
sinks = list(set(edgeDict[source]))
neighbours = [(0, 0) for i in range(k)]
done = 1
start = timer()
for key in edgeDict.keys():
# Can't be its own neighbour
if (key == source):
continue
neighbourSinks = list(set(edgeDict[key]))
# (A ^ B) = A + B - (A v B)
union = len(list(set(sinks + neighbourSinks)))
intersect = len(sinks) + len(neighbourSinks) - union
match = intersect / union
neighbours.append((key, match))
for i in range(len(neighbours) - 1, 0, -1):
# If they're out of order then swap them
if (neighbours[i][1] > neighbours[i - 1][1]):
tmp = neighbours[i]
neighbours[i] = neighbours[i - 1]
neighbours[i - 1] = tmp
else:
break
neighbours = neighbours[:-1]
if (verbose and done % 1000 == 0):
print("Iterated through {} of {} keys"
.format(done, len(edgeDict.keys())))
done += 1
# Time limit so none take too long
current = timer()
if (current - start > TIME_LIMIT):
return []
return neighbours
################################################################################