-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
1366 lines (1089 loc) · 49.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import random
import util
import kivy
kivy.require("1.9.1")
from kivy.app import App
from kivy.clock import Clock
from kivy.config import Config
from kivy.uix.floatlayout import FloatLayout
from kivy.uix.image import Image
from kivy.graphics import Color
from Model.Enum.Direction import Direction
from Model.Enum.Speed import Speed
from Model.Enum.State import State
from Model.Enum.RespawnState import RespawnState
from Model.Path import Path
from Model.Sprite import Sprite
from Model.TDIndicator import TDIndicator
from Model.TDSquare import TDSquare
Config.set('graphics', 'width', '1200')
Config.set('graphics', 'height', '500')
from kivy.core.window import Window
class RootWidgit(FloatLayout):
mazes = ['Maze/maze1.txt',
'Maze/maze2.txt',
'Maze/maze3.txt']
current_maze_index = 0
END_COLOURS = [Color(0, 1, 0),
Color(1, 1, 0),
Color(0, 0, 1),
Color(1, 1, 0)]
respawn_state = RespawnState.NORMAL
ROWS = 0
COLS = 0
END_ROWS = []
END_COLS = []
character = None
td_children_flag = False
mat_walls = [[[]]]
# Maze Board GridLayout Parameters
maze_board_mat = [[]]
MAZE_BOARD_ROWS = 0
MAZE_BOARD_COLS = 0
maze_board_children_size = 0
# RL parameters
episodes = 0
epsilon = 0.1
discount = 0.9
_lambda = 0.9
learning_rate = 0.5
move_cost = 0.05
def __init__(self, **kwargs):
super(RootWidgit, self).__init__(**kwargs)
# Get the maze_board and value_board GridLayout from .kv file
self.maze_board = self.ids.maze_board
self.value_board = self.ids.value_board
# Get Buttons from .kv file
self.learn_toggle_button = self.ids.learn_toggle_button
self.reset_button = self.ids.reset_button
self.speed_button = self.ids.speed_button
self.next_maze_button = self.ids.next_maze_button
self.random_respawn_button = self.ids.random_respawn_button
self.lambda_increase_button = self.ids.lambda_increase_button
self.lambda_decrease_button = self.ids.lambda_decrease_button
self.learning_rate_increase_button = self.ids.learning_rate_increase_button
self.learning_rate_decrease_button = self.ids.learning_rate_decrease_button
# Get Labels from .kv file
self.episode_label = self.ids.episode_label
self.epsilon_label = self.ids.epsilon_label
self.lambda_label = self.ids.lambda_label
self.discount_label = self.ids.discount_label
self.learning_rate_label = self.ids.learning_rate_label
# Set progress label text
self.episode_label.text = str(self.episodes)
self.epsilon_label.text = str(self.epsilon)
self.discount_label.text = str(self.discount)
self.lambda_label.text = str(self._lambda)
self.learning_rate_label.text = str(self.learning_rate)
# Pass in the maze .txt file to set up
self._setup_maze(self.mazes[self.current_maze_index])
def callback_setup(self, dt):
'''
This function servers the purpose of getting the walk_length for the character
and placing the character. It is a callback because it needs the GridLayout to
finish calculating everything first.
:param dt:
:return:
'''
self._get_walk_length()
self._place_character()
if self.td_children_flag is False:
self._add_TDSquare_children()
self.td_children_flag = True
# Set up the keyboard and bind it
self._keyboard = Window.request_keyboard(
self._keyboard_closed, self, 'text')
self._keyboard.bind(on_key_down=self._on_keyboard_down)
# Bind Buttons from .kv file
self.learn_toggle_button.bind(on_press=self._learn_toggle)
self.reset_button.bind(on_press=self._reset)
self.speed_button.bind(on_press=self._toggle_speed)
self.next_maze_button.bind(on_press=self._next_maze)
self.random_respawn_button.bind(on_press=self._toggle_respawn_state)
self.lambda_increase_button.bind(on_press=self._increase_lambda)
self.lambda_decrease_button.bind(on_press=self._decrease_lambda)
self.learning_rate_increase_button.bind(on_press=self._increase_learning_rate)
self.learning_rate_decrease_button.bind(on_press=self._decrease_learning_rate)
def learn_q_lambda(self, dt):
'''
Have character learn through Q-learn-lambda with
backward view eligibility trace
:param dt:
:return:
'''
# Termination state: When character made it to the end.
# Randomly place character onto new square, increment episodes
# and increment epsilon
if self._check_termination_square(self.character.current_row,
self.character.current_col) is True:
self._reset_character()
else:
# Get child_index to obtain the td_square from the value_board
current_td_square = self._get_td_square(self.character.current_row,
self.character.current_col)
# action_index is the index to be used while
# best_action_index is the "best" move possible
action_index, best_action_index = self._determine_action(current_td_square)
# SPECIAL CASE
# If character is in the initial position, it cannot move upward
# otherwise it will cause an error
if self.character.current_row == self.INITIAL_ROW and \
self.character.current_col == self.INITIAL_COL and \
action_index == Direction.NORTH.value:
while action_index == Direction.NORTH.value:
action_index = random.randint(0, 3)
# Choose appropriate animation based on index
# IMPORTANT: After this is called, the character will
# have already updated its rows and columns
valid_flag = self._animate(action_index)
# if valid_flag is True:
# Get the new updated td_square
new_td_square = self._get_td_square(self.character.current_row,
self.character.current_col)
# Calculate the q_value (valid_flag determines whether
# the AI hit the wall or not)
q_val = self._calculate_q_val(current_td_square, new_td_square, action_index, valid_flag)
# Increase eligibility trace
current_td_square.eligibility_trace[action_index] += 1
# Update values in accordance to Q-lambda
self._calculate_update_q_lambda(q_val, action_index, best_action_index)
# Update the image and color
self._color_trace()
def _add_TDSquare_children(self):
'''
This function adds an Image widget to each button to represent arrows of
where the AI will move to if they land on that square. It also adds an
indicator to show how strong the decision is. It constantly changes
as it is being updated.
:return:
'''
# Add image to each of the td_squares of the value_board
for td_square in self.value_board.children:
# Create td_indicator to be added into children of value_board
td_indicator = TDIndicator(x=td_square.x, y=td_square.y,
size=td_square.size)
# The image source will show arrow of where it will move next
image = Image(x=td_square.x, y=td_square.y,
size=td_square.size, opacity=0)
# Add the two widgets
td_square.add_widget(td_indicator)
td_square.add_widget(image)
# Remove td_indicator and images if it is a termination square
for x in xrange(len(self.END_REWARDS)):
# Clear all its children
td_square = self._get_td_square(self.END_ROWS[x],
self.END_COLS[x])
td_square.clear_widgets()
# Set the text color and size
td_square.disabled_color = [0, 0, 0, 1]
td_square.halign = 'center'
td_square.font_size = '25sp'
# Change background of td_square based on whether it is
# positive or negative
if self.END_REWARDS[x] > 0:
td_square.text = '+{}'.format(self.END_REWARDS[x])
td_square.background_color = [0, 1, 0, .75]
end_colour = self.END_COLOURS[x].rgb
end_colour.append(0.75)
td_square.background_color = end_colour
else:
td_square.text = '{}'.format(self.END_REWARDS[x])
td_square.background_color = [1, 0, 0, .75]
def _assign_rewards(self):
'''
- This function assigns rewards and colors of the termination square
- Assign a high negative Q-value NORTH of the entrance so that the
AI won't move off the map
:return:
'''
for x in range(len(self.END_REWARDS)):
# Assign reward and color to termination square
td_square = self._get_td_square(self.END_ROWS[x], self.END_COLS[x])
td_square.reward = self.END_REWARDS[x]
if td_square.reward > 0:
td_square.colour = self.END_COLOURS[x]
# Change value direction of value of initial square
# to negative so that it can't move up
td_square = self._get_td_square(self.INITIAL_ROW, self.INITIAL_COL)
td_square.direction_values[Direction.NORTH.value] = -100
def _animate(self, max_index):
'''
- This function takes the max_index which is the "best" move
of the four on the current td_square and then animates what
that move would look like.
- Returns a valid_flag to determine whether the move made was
valid. Valid means it did not bump into a wall. Invalid means
bumping into a wall.
:param max_index:
:return: Boolean
'''
valid_flag = True
# Calculate animation
if Direction.NORTH.value == max_index:
valid_move = self._valid_move(self.character.current_row, self.character.current_col,
Direction.NORTH)
if valid_move:
# Get animation for walking
self.animate = self.character.get_walk_animation(Direction.NORTH)
if valid_move is False:
# Get animation for wall_bump
self.animate = self.character.get_bump_wall_animation(Direction.NORTH)
valid_flag = False
elif Direction.EAST.value == max_index:
valid_move = self._valid_move(self.character.current_row, self.character.current_col,
Direction.EAST)
if valid_move:
# Get animation for walking
self.animate = self.character.get_walk_animation(Direction.EAST)
if valid_move is False:
# Get animation for wall_bump
self.animate = self.character.get_bump_wall_animation(Direction.EAST)
valid_flag = False
elif Direction.SOUTH.value == max_index:
valid_move = self._valid_move(self.character.current_row, self.character.current_col,
Direction.SOUTH)
if valid_move:
# Get animation for walking
self.animate = self.character.get_walk_animation(Direction.SOUTH)
if valid_move is False:
# Get animation for wall_bump
self.animate = self.character.get_bump_wall_animation(Direction.SOUTH)
valid_flag = False
elif Direction.WEST.value == max_index:
valid_move = self._valid_move(self.character.current_row, self.character.current_col,
Direction.WEST)
if valid_move:
# Get animation for walking
self.animate = self.character.get_walk_animation(Direction.WEST)
if valid_move is False:
# Get animation for wall_bump
self.animate = self.character.get_bump_wall_animation(Direction.WEST)
valid_flag = False
self.animate.bind(on_complete=self._end_animation)
self.animate.start(self.character)
return valid_flag
def _calculate_q_val(self, current_td_square, new_td_square, action_index, valid_flag):
'''
This function calculates the Q_value.
:param current_td_square: TDSquare
:param new_td_square: TDSquare
:param action_index: integer, chosen action
:param valid_flag: boolean, determines whether AI hit a wall or not
:return:
'''
# Current td_square value of the "best" move grabbed from the current_max_index
current_val = current_td_square.direction_values[action_index]
# new td_square reward
reward = new_td_square.reward
# Find the max_value of the direction_values and its index of new_td_square
new_max_val = max(new_td_square.direction_values)
# Equation
q_val = self.learning_rate * (reward + self.discount*new_max_val - current_val - self.move_cost)
return q_val
def _calculate_update_q(self, current_td_square, new_td_square, index, valid_flag):
'''
- Using Q-learning, the character updates its current td_square direction_values
accordingly so that the next move can be chosen.
- Valid flag used to increase penalty when bumping walls
:param current_td_square: TDSquare
:param new_td_square: TDSquare
:param current_max_index: integer
:return:
'''
# Current td_square value of the "best" move grabbed from the current_max_index
current_val = current_td_square.direction_values[index]
# new td_square reward
reward = new_td_square.reward
# Find the max_value of the direction_values and its index of new_td_square
new_max_val = max(new_td_square.direction_values)
# Q-learning equation
current_td_square.direction_values[index] += \
self.learning_rate * (reward + self.discount * new_max_val - current_val - self.move_cost)
def _calculate_update_q_lambda(self, q_val, action_index, best_index):
'''
This function upates all the td_square in the value_board following
Q-lambda learning.
:param q_val: float
:param action_index: int
:param best_index: int
:return:
'''
# Check all td_squares in value board
for td_square in self.value_board.children:
# Look at each direction of the td_square
for direction in Direction:
# Grab the eligibility trace for specific direction
elig_trace_val = td_square.eligibility_trace[direction.value]
# adjust_q_val accordingly and add to the direction q_val
adjust_q_val = self.learning_rate * q_val * elig_trace_val
td_square.direction_values[direction.value] += adjust_q_val
# If selected action is "best" action, then decay it
if action_index == best_index:
adjust_elig_trace = self.discount * self._lambda * elig_trace_val
td_square.eligibility_trace[direction.value] = adjust_elig_trace
# If selected action is not "best" action, set it to 0
else:
td_square.eligibility_trace[direction.value] = 0
def _change_position(self, row, col, current_direction):
'''
This function changes the row or column based on the direction
it is trying to go.
:param row: int
:param col: int
:param current_direction_index: Direction (enum)
:return: row, col (int, int)
'''
if current_direction is Direction.NORTH:
return row-1, col
if current_direction is Direction.EAST:
return row, col+1
if current_direction is Direction.SOUTH:
return row+1, col
if current_direction is Direction.WEST:
return row, col-1
return row, col
def _check_termination_square(self, row, col):
'''
Check the row and column passed in to see if it lands on
a terminating square
:param row: int
:param col: int
:return:
'''
for x in xrange(len(self.END_ROWS)):
if row == self.END_ROWS[x] and col == self.END_COLS[x]:
return True
return False
def _color_trace(self):
'''
This function iterates through all the td_squares
and set its color based on where the arrows will
take it.
:return:
'''
for row in range(self.ROWS):
for col in range(self.COLS):
# Trace each td_square to find the termination color
color = self._trace(row, col)
# Get and set color of current td_square
td_square = self._get_td_square(row, col)
td_square.colour = color
td_square.set_TDIndicator_color(color)
# Update the td_square to show the changes
td_square.update()
def _decrease_lambda(self, dt):
if self._lambda > 0.11:
self._lambda -= 0.1
else:
self._lambda = 0.0
self.lambda_label.text = str(self._lambda)
def _decrease_learning_rate(self, dt):
if self.learning_rate > 0.12:
self.learning_rate -= 0.1
else:
self._learning_rate = 0.0
self.learning_rate_label.text = str(self.learning_rate)
def __determine_action(self, current_td_square):
'''
This function uses epsilon greedy to choose its next move.
The action_index is the move chosen while best_action_index
is the "best" move at the moment
:param current_td_square: the td_square the character is currently on
:return:
'''
# Generate random number to determine epsilon greedy
rand_num = random.uniform(0, 1)
# Case where we take random move
if rand_num < self.epsilon:
action_index = random.randint(0, 3)
# Case where we take "best" move
else:
max_val = max(current_td_square.direction_values)
action_index = current_td_square.direction_values.index(max_val)
# Max_index holds the "best" move
max_val = max(current_td_square.direction_values)
best_action_index = current_td_square.direction_values.index(max_val)
# return indices
return action_index, best_action_index
def _determine_action(self, current_td_square):
'''
This function uses epsilon greedy to choose its next move.
The action_index is the move chosen while best_action_index
is the "best" move at the moment
:param current_td_square: TDSquare
:return:
'''
# These are all the Q-values of the state (N,E,S,W)
direction_values = current_td_square.direction_values
# Get the max value
max_val = max(direction_values)
# This number will be used for picking a move following
# epsilon-greedy
rand_num = random.uniform(0, 1)
# Case where we take random move
if rand_num < self.epsilon:
action_index = random.randint(0, 3)
# Case where we take "best" move
else:
# Initially, all valid moves start at 0, but we want
# to still pick a move randomly.
choices = []
for x in range(len(direction_values)):
if direction_values[x] == max_val:
choices.append(x)
action_index = choices[random.randint(0, len(choices)-1)]
if direction_values[action_index] == max_val:
return action_index, action_index
best_action_index = direction_values.index(max_val)
return action_index, best_action_index
def _end_animation(self, widget, item):
'''
- This binding method is used to stop the walking animation
and switch back to the standing animation when it is done
walking from point A to B.
- It rebinds the keyboard again as well. Keyboard is unbinded
earlier to stop actions in middle of animation
'''
# Switches back to just standing
self.character.set_standing()
# Bind keyboard again after animation is done
self._keyboard.bind(on_key_down=self._on_keyboard_down)
# Continue learning if character's state is LEARNING
if self.character.state is State.LEARNING:
self.learn_q_lambda(None)
def _get_best_direction(self, row, col):
'''
This function gets the best direction_index and returns it.
Best direction_index is the one with highest Q-value
:param row:
:param col:
:return:
'''
td_square = self._get_td_square(row, col)
max_val = max(td_square.direction_values)
index = td_square.direction_values.index(max_val)
if index == Direction.NORTH.value:
return Direction.NORTH
if index == Direction.EAST.value:
return Direction.EAST
if index == Direction.SOUTH.value:
return Direction.SOUTH
if index == Direction.WEST.value:
return Direction.WEST
def _get_child_index_maze_board(self, row, col):
'''
- This function serves the purpose of getting the child_index for
maze_board gridlayout.
:param row:
:param col:
:return:
'''
# Calculate how many rows to go down and
# columns to go over
row = (2 * row + 1) * self.MAZE_BOARD_ROWS
col = (2 * col + 1) % self.MAZE_BOARD_COLS
# Add the rows and columns to get a pseduo position
pos = row + col
# Position has to be readjusted because child[0]
# starts on bottom right
real_pos = self.maze_board_children_size - pos - 1
return real_pos
def _get_child_index_value_board(self, row, col):
'''
This function gets the child index for self.value_board,
given a row and column. This is needed because the GridLayout
children starts [0] at the bottom right which is not what
we need.
:param row: integer
:param col: integer
:return: integer, child_index
'''
# Determines how many rows to go down and columns to go across
row = row * self.ROWS
col = col % self.COLS
# Add the two
pos = row + col
# Adjust it by subtracting the total children_size
value_board_children_size = self.ROWS * self.COLS
child_index = value_board_children_size - pos - 1
# return proper index
return child_index
def _get_td_square(self, row, col):
'''
Given a row and column, this will return the correct
td_square from value board to be used
:param row: int
:param col: int
:return: TDSquare
'''
child_index = self._get_child_index_value_board(row, col)
return self.value_board.children[child_index]
def _get_walk_length(self):
'''
This function serves the purpose of getting the walk length from
one square to the next both in x and y direction.
:param dt:
:return:
'''
# Get initial index
initial_index = self._get_child_index_maze_board(self.INITIAL_ROW, self.INITIAL_COL)
# Get end index for x direction
end_index_x = self._get_child_index_maze_board(self.INITIAL_ROW, self.INITIAL_COL-1)
# Get the two x coordinates for those indices
x1 = self.maze_board.children[initial_index].pos[0]
x2 = self.maze_board.children[end_index_x].pos[0]
# Calculate the walk_length in x direction
self.character.walk_length_x = x1 - x2
# Get end index for y direction
end_index_y = self._get_child_index_maze_board(self.INITIAL_ROW + 1, self.INITIAL_COL)
# Get the two y coordinates for those indices
y1 = self.maze_board.children[initial_index].pos[1]
y2 = self.maze_board.children[end_index_y].pos[1]
# Calculate the walk_length in y direction
self.character.walk_length_y = y1 - y2
def _handle_keyboard_action(self, keycode):
'''
This function handles certain keys that user may press
:param keycode:
:return:
'''
# Make sure the actions are only completed if desired keys
# are pressed
animate_flag = False
# This boolean checks if the AI bumped into a wall or not
valid_move = False
# The decision the AI will make
action_index = 0
# Conditions to determine which direction to move character
# Three options for validity of move: True, False, None
# True: There are no walls; therefore, you can walk through
# False: There is a wall; animate "bumping" into wall
# None: Character is now out of bound
# NORTH CONDITION
if keycode[1] == 'w':
if self.character.current_row == self.INITIAL_ROW and \
self.character.current_col == self.INITIAL_COL:
return False, False, False
animate_flag = True
valid_move = self._valid_move(self.character.current_row, self.character.current_col,
Direction.NORTH)
action_index = Direction.NORTH.value
if valid_move:
# Get animation for walking
self.animate = self.character.get_walk_animation(Direction.NORTH)
if valid_move is False:
# Get animation for wall_bump
self.animate = self.character.get_bump_wall_animation(Direction.NORTH)
# WEST
elif keycode[1] == 'a':
animate_flag = True
valid_move = self._valid_move(self.character.current_row, self.character.current_col,
Direction.WEST)
action_index = Direction.WEST.value
if valid_move:
# Get animation for walking
self.animate = self.character.get_walk_animation(Direction.WEST)
if valid_move is False:
# Get animation for wall_bump
self.animate = self.character.get_bump_wall_animation(Direction.WEST)
# SOUTH
elif keycode[1] == 's':
animate_flag = True
valid_move = self._valid_move(self.character.current_row, self.character.current_col,
Direction.SOUTH)
action_index = Direction.SOUTH.value
if valid_move:
# Get animation for walking
self.animate = self.character.get_walk_animation(Direction.SOUTH)
if valid_move is False:
# Get animation for wall_bump
self.animate = self.character.get_bump_wall_animation(Direction.SOUTH)
# EAST
elif keycode[1] == 'd':
animate_flag = True
valid_move = self._valid_move(self.character.current_row, self.character.current_col,
Direction.EAST)
action_index = Direction.EAST.value
if valid_move:
# Get animation for walking
self.animate = self.character.get_walk_animation(Direction.EAST)
else:
# Get animation for wall_bump
self.animate = self.character.get_bump_wall_animation(Direction.EAST)
return animate_flag, valid_move, action_index
def _increase_lambda(self, dt):
if self._lambda < 0.99:
self._lambda += 0.1
self.lambda_label.text = str(self._lambda)
def _increase_learning_rate(self, dt):
if self.learning_rate < 0.99:
self.learning_rate += 0.1
self.learning_rate_label.text = str(self.learning_rate)
def _keyboard_closed(self):
self._keyboard.unbind(on_key_down=self._on_keyboard_down)
self._keyboard = None
def _learn_toggle(self, dt):
'''
This callback is toggles between having the AI learn
and stopping it so that the user can manually move
:param dt:
:return:
'''
if self.character.state == State.LEARNING:
# Change states and bg color
self.character.state = State.MANUAL
self.learn_toggle_button.background_color = [1, 1, 1, 1]
elif self.character.state == State.MANUAL:
# Change states and bg color
self.character.state = State.LEARNING
self.learn_toggle_button.background_color = [0, 0, 1, 1]
# Start Learning
self.learn_q_lambda(None)
def _next_maze(self, dt):
self.current_maze_index += 1
self.current_maze_index %= len(self.mazes)
self.next_maze_button.text = 'Maze: {}'.format(self.current_maze_index + 1)
self._reset(None)
def _on_keyboard_down(self, keyboard, keycode, text, modifiers):
'''
This function serves the purpose of handling keyboard events
to move the character based on the keys: w, a, s, d.
'''
# AI's current row/col before moving
curr_row = self.character.current_row
curr_col = self.character.current_col
# If AI is learning, make sure it won't be interrupted
if self.character.state is State.LEARNING:
return True
# handles keyboard action and receive variables to update keyboard
animate_flag, valid_move, action_index = self._handle_keyboard_action(keycode)
# Only complete these commands if any of the desired keys are pressed
if animate_flag is True:
# Bind the animation
self.animate.bind(on_complete=self._end_animation)
# Start animation
self.animate.start(self.character)
# Unbind keyboard to stop action in middle of animation
self._keyboard.unbind(on_key_down=self._on_keyboard_down)
if valid_move is True:
# Update the value_board accordingly
self._update_value_board(valid_move, curr_row, curr_col, action_index)
# Return True to accept the key. Otherwise, it will be used by
# the system.
return True
def _opposite_directions(self, direction_1, direction_2):
'''
Function takes two direction index and tells you if
they are opposite of each other
:param direction_1: Direction (enum)
:param direction_2: Direction (enum)
:return: boolean
'''
if direction_1 is Direction.NORTH and \
direction_2 is Direction.SOUTH:
return True
if direction_1 is Direction.EAST and \
direction_2 is Direction.WEST:
return True
if direction_1 is Direction.SOUTH and \
direction_2 is Direction.NORTH:
return True
if direction_1 is Direction.WEST and \
direction_2 is Direction.EAST:
return True
return False
def _place_character(self):
'''
This callback serves the purpose of calculating the character's position
and then placing the character there.
:param dt:
:return:
'''
# Get index of character
row = self.character.current_row
col = self.character.current_col
index = self._get_child_index_maze_board(row, col)
# Get the x,y size of the gridlayout square
# and use that to calculate the x,y adjustments
square_size = self.maze_board.children[index].size
square_x_adjust = square_size[0] / 2
square_y_adjust = square_size[1] / 2
# Get the sprite x,y size based on ratio
# of the actual square
ratio = 0.8
sprite_size_x = square_size[0] * ratio
sprite_size_y = square_size[1] * ratio
# Use the sprite size to calculate x,y
# adjustment position
sprite_x_adjust = sprite_size_x / 2
sprite_y_adjust = sprite_size_y / 2
# Get the x,y position of the initial square
initial_pos = self.maze_board.children[index].pos
initial_x = initial_pos[0]
initial_y = initial_pos[1]
# Calculate the x,y position to place the character
x = initial_x + square_x_adjust - sprite_x_adjust
y = initial_y + square_y_adjust - sprite_y_adjust
self.character.size = [sprite_size_x, sprite_size_y]
self.character.pos = [x, y]
self.add_widget(self.character)
def _populate_maze_board(self):
'''
This function serves the purpose of populating the GridLayouts set up in
the .kv file.
- The maze_board is adjusted to add walls in between them and then fill
them according to the maze_board_mat
:return: None
'''
# Set maze_board's children size back to 0
self.maze_board_children_size = 0
# setting columns for the GridLayout
self.maze_board.cols = self.MAZE_BOARD_COLS
# populate maze_board GridLayout
for x in xrange(self.MAZE_BOARD_ROWS):
# Creates the actual vertical path
if x % 2 == 1:
# Handles horizontal wall widgits as well as the actual path
for y in xrange(self.MAZE_BOARD_COLS):
# Creates actual path
if y % 2 == 1:
# Create path block
path = Path()
# Paths are buttons which need to be disabled
path.disabled = True
# update maze_board children size
self.maze_board_children_size += 1
# Add to the board
self.maze_board.add_widget(path)
# Creates wall
else:
# Create and add wall into the GridLayout
# Walls are Buttons which need to be disabled
wall = Path(wall=True, size_hint_x=0.1, size_hint_y=1)
wall.disabled = True
# update maze_board children size
self.maze_board_children_size += 1
# Add to board
self.maze_board.add_widget(wall)
# create the vertical walls
else:
for y in xrange(self.maze_board.cols):
# Creating the wall and adding it
# Walls are Buttons which need to be disabled
wall = Path(wall=True, size_hint_x=0.1, size_hint_y=0.1)
wall.disabled = True
# update maze_board children size
self.maze_board_children_size += 1
# Add to board
self.maze_board.add_widget(wall)
def _populate_value_board(self):
'''
This function serves the purpose of populating the value board (GridLayout)
with TDSquare objects (which inherit from Button). It will set
the rewards for the completion square as well which will be 1
:return:
'''
# Set up the columns
self.value_board.cols = self.COLS
for x in xrange(self.ROWS):
for y in xrange(self.COLS):
# Create a td_square to add
td_square = TDSquare()
# TDSquares are button that need to be disabled
td_square.disabled = True
# Add td_square to our value_board gridlayout
self.value_board.add_widget(td_square)
def _populate_walls(self):
'''
This function serves the purpose of populating walls and the
intermediate paths between squares. This is done using the
mat_wall matrix which tells you whether there is a wall NORTH,
EAST, SOUTH, or WEST.
IMPORTANT: The walls to be filled are children of the maze_board