-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathb_tree.py
492 lines (451 loc) · 21 KB
/
b_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
"""
This is a B-Tree that supports addition and deletion from the tree
Mostly inefficient, this is a proof-of-concept and learning material, rather than something you'd use in production.
The tests are heavily commented and illustrated and are a good source of edge cases,
so if you're working on building your own B-Tree, you'll do yourself a favor in copying some tests
TODO: Heavy refactor, a lot of code is repeated and some is not clear enough
"""
class BNode:
def __init__(self, parent=None, order=6):
self.parent = parent
self.order = order
self.max_values_count = order-1
self.values = []
self.children = [None for _ in range(order)]
def add(self, value):
if not self.__has_children():
# Leaf: Add here
self.values.append(value)
self.values = sorted(self.values)
if len(self.values) == self.max_values_count+1:
self.split()
else:
# Find downwards node :)
# try max left and max right first
if value < self.values[0]:
# go left
wanted_child = self.children[0]
wanted_child.add(value)
elif value > self.values[-1]:
# go right
wanted_child = self.children[-1]
wanted_child.add(value)
else:
# find middle
for i in range(1, len(self.values)):
if value > self.values[i-1] and value < self.values[i]:
wanted_child = self.children[i]
wanted_child.add(value)
break
def try_transfer(self):
if len(self.values) == 0:
old_ch = [ch for ch in self.children]
for ch in old_ch:
if ch is not None:
self.merge_with_child(ch)
# self.children = []
new_parent = BNode(order=self.order, parent=self.parent)
new_parent.children = []
if self.parent is None:
return
# swap parent with right or left sibling
right_sbl_idx = self.parent.children.index(self) + 1
left_sbl_idx = self.parent.children.index(self) - 1
# decide on a sibling
rght_sibling = None
left_sibling = None
if right_sbl_idx < len(self.parent.children):
rght_sibling = self.parent.children[right_sbl_idx]
if left_sbl_idx >= 0:
left_sibling = self.parent.children[left_sbl_idx]
chosen_sibling = None # the sibling we'll transfer with
sibling_idx = None
if rght_sibling is not None and left_sibling is not None:
# decide from both, taking one with more values
if len(rght_sibling.values) > len(left_sibling.values):
# take right
chosen_sibling = rght_sibling
else:
chosen_sibling = left_sibling
elif rght_sibling is not None:
# take right
chosen_sibling = rght_sibling
elif left_sibling is not None:
# take left
chosen_sibling = left_sibling
if len(chosen_sibling.values) == 1:
# take from the parent and try to transfer him
self.values.append(self.parent.values[0])
self.parent.values.remove(self.parent.values[0])
self.parent.try_transfer()
return
sibling_idx = 0 if chosen_sibling == rght_sibling else -1
parent_self_idx = self.parent.children.index(
self) # the index of the current node in the parent's children arr
self.parent.children[parent_self_idx] = new_parent
if len(chosen_sibling.values) == 1:
raise Exception("TANK")
if len(self.parent.values) != 1:
raise Exception("TANK")
new_parent.values.append(self.parent.values[sibling_idx])
# get first rght sibling child and add it here (old parent successor)
if sibling_idx == 0:
new_parent.children.append(self)
new_parent.children.append(chosen_sibling.children[sibling_idx])
else:
new_parent.children.append(chosen_sibling.children[sibling_idx])
new_parent.children.append(self)
self.parent.values[sibling_idx] = chosen_sibling.values[sibling_idx]
chosen_sibling.children[sibling_idx].parent = new_parent
chosen_sibling.values.pop(sibling_idx)
chosen_sibling.children.pop(sibling_idx)
self.parent = new_parent
def remove(self, value):
# find the appropriate root to remove
if value not in self.values:
if not self.__has_children():
raise Exception('Value not in tree!')
if value < self.values[0]: # before first
self.children[0].remove(value)
elif value > self.values[-1]: # after last
self.children[-1].remove(value)
else:
for i in range(len(self.values)-1):
if self.values[i] < value < self.values[i+1]:
self.children[i+1].remove(value)
break
return
if not self.__has_children(): # easy, simple remove
self.values.remove(value)
if len(self.values) == 0: # try to do a transfer
self.parent.remove_merge(value, self)
return
# try to find predecessor and successor
predecessor: BNode = self.get_predecessor(value)
value_idx = self.values.index(value)
# predecessor: BNode = self.children[value_idx]
# if predecessor.__has_children():
# predecessor = predecessor.children[-1]
successor: BNode = self.children[value_idx + 1]
if successor is not None and successor.__has_children():
successor = successor.children[0]
if successor is not None and predecessor is not None and len(successor.values) == 1 and len(predecessor.values) == 1 and predecessor in self.children and successor in self.children:
# both have 1 child, so, merge VALUE and SUCCESSOR into PREDECESSOR
for l in successor.values:
predecessor.add(l)
# TODO: It splits itself with predecessor.add an predecessor.remove
# predecessor.add(value)
# TODO: Children lost
self.values.remove(value)
# re-order children
if len(self.children) == 2:
self.children = [self.children[0]]
for i in range(value_idx + 1, len(self.children) - 1):
self.children[i] = self.children[i + 1]
# recursively remove
# predecessor.remove(value)
if len(self.values) == 0:
for ch in self.children:
if ch is not None:
self.merge_with_child(ch)
# self.children = []
new_parent = BNode(order=self.order, parent=self.parent)
new_parent.children = []
# swap parent with right or left sibling
right_sbl_idx = self.parent.children.index(self)+1
left_sbl_idx = self.parent.children.index(self)-1
# decide on a sibling
rght_sibling = None
left_sibling = None
if right_sbl_idx < len(self.parent.children):
rght_sibling = self.parent.children[right_sbl_idx]
if left_sbl_idx >= 0:
left_sibling = self.parent.children[left_sbl_idx]
chosen_sibling = None # the sibling we'll transfer with
sibling_idx = None
if rght_sibling is not None and left_sibling is not None:
# decide from both, taking one with more values
if len(rght_sibling.values) > len(left_sibling.values):
# take right
chosen_sibling = rght_sibling
else:
chosen_sibling = left_sibling
elif rght_sibling is not None:
# take right
chosen_sibling = rght_sibling
elif left_sibling is not None:
# take left
chosen_sibling = left_sibling
sibling_idx = 0 if chosen_sibling == rght_sibling else -1
parent_self_idx = self.parent.children.index(self) # the index of the current node in the parent's children arr
self.parent.children[parent_self_idx] = new_parent
if len(chosen_sibling.values) == 1:
raise Exception("TANK")
if len(self.parent.values) != 1:
raise Exception("TANK")
new_parent.values.append(self.parent.values[sibling_idx])
# get first rght sibling child and add it here (old parent successor)
if sibling_idx == 0:
new_parent.children.append(self)
new_parent.children.append(chosen_sibling.children[sibling_idx])
else:
new_parent.children.append(chosen_sibling.children[sibling_idx])
new_parent.children.append(self)
self.parent.values[sibling_idx] = chosen_sibling.values[sibling_idx]
chosen_sibling.children[sibling_idx].parent = new_parent
chosen_sibling.values.pop(sibling_idx)
chosen_sibling.children.pop(sibling_idx)
self.parent = new_parent
elif successor is None or len(predecessor.values) >= len(successor.values):
pass
# switch with predecessor value
print(f'EXCHANGING NODE {self.values[value_idx]} WITH PREDECESSOR {predecessor.values[-1]}')
self.values[value_idx], predecessor.values[-1] = predecessor.values[-1], self.values[value_idx]
# recursively delete downwards
print(f'DELETING {value} FROM PREDECESSOR')
return predecessor.remove(value)
else:
# switch with successor value
self.values[value_idx], successor.values[0] = successor.values[0], self.values[value_idx]
return successor.remove(value)
def split(self):
if len(self.values) % 2 == 0:
median = (len(self.values) // 2) - 1
else:
median = (len(self.values) // 2)
median_value = self.values[median]
left_arr = self.values[:median]
right_arr = self.values[median+1:]
if self.parent is None:
# easy shit, create two children
left_node = BNode(order=self.order, parent=self)
# set parents
left_node.values = left_arr
right_node = BNode(order=self.order, parent=self)
right_node.values = right_arr
# copy children
left_node.children = self.children[:median+1]
right_node.children = self.children[median+1:]
# assign parents to children
for i in left_node.children:
if i is not None:
i.parent = left_node
for i in right_node.children:
if i is not None:
i.parent = right_node
self.values = [median_value]
self.children = [left_node, right_node]
else:
left_node = BNode(order=self.order, parent=self)
left_node.values = left_arr
right_node = BNode(order=self.order, parent=self)
right_node.values = right_arr
# copy children
left_node.children = self.children[:median + 1]
right_node.children = self.children[median + 1:]
# assign parents to children
for i in left_node.children:
if i is not None:
i.parent = left_node
for i in right_node.children:
if i is not None:
i.parent = right_node
self.values = [median_value]
self.children = [left_node, right_node]
# Merge nodes
self.parent.merge_with_child(self)
def merge_with_child(self, other: 'BNode'):
# add the values
to_split = False
if other is None:
raise Exception('Error here!')
if len(other.values) + len(self.values) >= self.order:
to_split = True # we will go past the capacity for this node and will need to split it
if other not in self.children:
raise Exception('A BNode can only merge with its children!')
other_idx = self.children.index(other)
# from other_idx-1 onwards, insert all the values
insert_idx = other_idx
for other_val in other.values:
self.values.insert(insert_idx, other_val)
insert_idx += 1
if self.values != sorted(self.values):
raise Exception('Sort err while adding values with merge!')
# copy children
# copy first child
if other.__has_children():
self.children[other_idx] = other.children[0]
self.children[other_idx].parent = self
insert_idx = other_idx + 1
for child in other.children[1:]:
self.children.insert(insert_idx, child)
child.parent = self
insert_idx += 1
if to_split:
self.split()
def __has_children(self):
return any(self.children)
def remove_merge(self, value, child_node: 'BNode'):
"""
Transfer
This is called whenever we remove a children node and it has less than T keys
i.e removing 40 from here, we'll call remove_merge on C.
C.remove_merge(40)
we want to find a proper sibling to replace self (C) with
50(C)
/ \
(F)40 60|65 (G)
"""
# vl_idx = child_node.values.index(value)
# try to find left sibling
start_val = self.values[0]
end_val = self.values[-1]
# find left and right idx
move_value = None
right_idx = None
left_idx = None
if value < start_val:
move_value = start_val
mv_val_idx = 0
left_idx, right_idx = None, 1
elif value > end_val:
move_value = end_val
mv_val_idx = len(self.values)-1
left_idx, right_idx = len(self.children)-2, None
else:
for i in range(0, len(self.values)-1):
if self.values[i] < value < self.values[i+1]:
# raise Exception('Not sure what to do here and what the value is')
# Move value here depends on the transfer we're gonna do
# if left - i
# if right - i+1
move_value = self.values[i]
mv_val_idx = i
# element is at I-1, left is at i-1, righ at i+1
left_idx = i
right_idx = i+2
if right_idx is not None and self.children[right_idx] is not None and len(self.children[right_idx].values) > 1:
move_value = self.values[i+1]
mv_val_idx = i+1
elif left_idx is not None and self.children[left_idx] is not None and len(self.children[left_idx].values) > 1:
move_value = self.values[i]
mv_val_idx = i
break
can_take_right = right_idx is not None and len(self.children[right_idx].values) > 1
can_take_left = left_idx is not None and len(self.children[left_idx].values) > 1
if can_take_right and can_take_left and len(self.children) > 2:
# take one with more values
if len(self.children[left_idx].values) > len(self.children[right_idx].values):
predecessor = self.children[left_idx]
if len(self.children[left_idx].values) == 1:
# TODO: Merge
predecessor.add(move_value)
self.children.remove(child_node)
return
pass
self.values[mv_val_idx], predecessor.values[-1] = predecessor.values[-1], self.values[mv_val_idx]
child_node.add(move_value)
predecessor.remove(move_value)
else:
# take right
successor = self.children[right_idx]
if len(self.children[right_idx].values) == 1:
# TODO: Merge
successor.add(move_value)
self.children.remove(child_node)
return
self.values[mv_val_idx], successor.values[0] = successor.values[0], self.values[mv_val_idx]
child_node.add(move_value)
# child_node.values[vl_idx] = mv_val_idx
successor.remove(move_value)
elif can_take_right or (left_idx is None and right_idx is not None and self.children[right_idx] is not None and len(self.children[right_idx].values) >= 1 and len(self.children) > 2):
successor = self.children[right_idx]
if len(self.children[right_idx].values) == 1:
# TODO: Merge
self.values.remove(move_value)
successor.add(move_value)
self.children.remove(child_node)
return
# take from successor
self.values[mv_val_idx], successor.values[0] = successor.values[0], self.values[mv_val_idx]
child_node.add(move_value)
# child_node.values[vl_idx] = mv_val_idx
successor.remove(move_value)
elif can_take_left or (right_idx is None and left_idx is not None and self.children[left_idx] is not None and (len(self.children[left_idx].values) >= 1 and len(self.children) > 2)):
predecessor = self.children[left_idx]
if len(self.children[left_idx].values) == 1:
# TODO: Merge
self.values.remove(move_value)
predecessor.add(move_value)
self.children.remove(child_node)
return
self.values[mv_val_idx], predecessor.values[-1] = predecessor.values[-1], self.values[mv_val_idx]
child_node.add(move_value)
predecessor.remove(move_value)
else:
# merge
# TANK
# TODO: Tank more
# merge oneself with children, we should now be empty
self.merge_with_children()
# take the parent and try to merge him
self.values.append(self.parent.values[0])
self.parent.values.remove(self.parent.values[0])
self.parent.try_transfer()
# self.parent.remove_merge(334, self)
# self.merge_recursively()
self.children = [None for _ in self.children]
def merge_with_children(self):
p = BNode(order=self.order)
p.values = [val for val in self.values]
p.children = [child for child in self.children]
self.values = []
self.children = [p]
for pc in p.children:
p.merge_with_child(pc)
return p
pass
def merge_recursively(self, excluding=None):
"""
Merge this current root with all its children
"""
from copy import deepcopy
old_childre = list(self.children)
for other_idx, ch in enumerate(old_childre):
if ch is not None:
if ch == excluding:
continue
self.values += ch.values
self.values = sorted(self.values)
# copy children
# copy first child
if ch.__has_children():
self.children[other_idx] = ch.children[0]
self.children[other_idx].parent = self
insert_idx = other_idx + 1
for child in ch.children[1:]:
self.children.insert(insert_idx, child)
child.parent = self
insert_idx += 1
# self.merge_with_child(ch)
if self.parent is not None:
self.parent.merge_recursively(excluding=self)
def get_predecessor(self, value) -> 'BNode':
"""
Return the predecessor of our given value
e.g
100 | 200 | 300 (A)
/ |
150
\
165
A.get_predecessor(200) should return 165
"""
if value not in self.values:
raise Exception("Can't get the predecessor of a value that does not exist!")
value_idx = self.values.index(value)
predecessor: BNode = self.children[value_idx] # get the smaller node
# go right as much as possible
while predecessor is not None and len(predecessor.children) > 0 and predecessor.children[-1] is not None:
predecessor = predecessor.children[-1]
return predecessor