-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCubiCupNode.py
188 lines (149 loc) · 6.97 KB
/
CubiCupNode.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
from math import sqrt
from math import log
import CubiCupState
import CubiCupNode
from CubiCupDriver import BLUE
from CubiCupDriver import GREEN
import Network
class Node:
def __init__(self, parent, state, probability=1, moveProbFunc=None):
self.parent = parent
self.state = state
self.score = 0
self.sims = 0
self.explore = 2
self.terminalChild = None
self.actionFor = state.lastTurn()
self.probability = probability
self.children = [None for x in range(len(state.availableMoves))]
self.childrenUnexplored = len(self.children)
self.moveProbFunc = moveProbFunc
if moveProbFunc is not None:
self.childProbs = Network.getMoveProbs(self.state, moveProbFunc)
else:
self.childProbs = None
if self.state.gameOver:
self.isTerminal = True
self.terminalValue = state.endValue
self.terminalScore = state.endValue[self.state.turn]
else:
self.isTerminal = False
self.terminalValue = None
self.terminalScore = -float("inf")
# Create a child from the ith available move
def createChildAt(self, i):
# Copy current state
newChildState = CubiCupState.State(self.state.boardSize, self.state)
# Update the new state with the ith available move
newChildState.takeTurn(self.state.availableMoves[i])
# Create new node with new state, listing this node as parent
if self.childProbs is None:
self.children[i] = CubiCupNode.Node(self, newChildState, moveProbFunc=self.moveProbFunc)
else:
self.children[i] = CubiCupNode.Node(self, newChildState, probability=self.childProbs[i], moveProbFunc=self.moveProbFunc)
# This node has a child, it is no longer a leaf
self.childrenUnexplored -= 1
# Check to see if this node is terminal, terminal is defined by:
# 1) A child can result in a forced win for this node, terminal win
# 2) All children are terminal
# This attempts to find the best win available, but this doesn't guarantee we find the
# best win, we could get the greatest terminal score that is a win, but if a better move
# has not been determined to be terminal yet, we can't find it.
def checkForTerminal(self):
allChildrenTerminal = True # Assume all nodes are terminal
# Loop through all children
for child in self.children:
if child is not None and child.isTerminal:
# If child has better terminal value, save it
if child.terminalValue[child.actionFor] >= self.terminalScore:
self.terminalChild = child
self.terminalValue = child.terminalValue
self.terminalScore = child.terminalValue[child.actionFor]
else:
# Non-existent child or non terminal child
allChildrenTerminal = False
# All child nodes are terminal or we found a terminal win
if allChildrenTerminal or self.terminalScore >= 1:
self.isTerminal = True
def updateWith(self, sims, endValue):
self.sims += sims
self.score += endValue[self.actionFor]
# This method finds the best available move for this node, attempts to maximize win chance
def getBestChild(self):
# If this node is terminal, the terminal child maximizes our score, so if it is
# greater than 0, it's either a forced win for this node, or there is a tie
# with every other being either a tie or a loss. So if we are greater than 0,
# that's the best we can do, otherwise just uses the win chance and hope
# the other player doesn't see how to force their win
if self.isTerminal and self.terminalScore > 0:
return self.terminalChild
currentMaxWin = -float("inf")
bestChild = None
# Loop through all children to find the one with the greatest win chance
for child in self.children:
if child is not None and child.getWinChance() > currentMaxWin:
currentMaxWin = child.getWinChance()
bestChild = child
return bestChild
# Get the child that has the highest UCT score, if child does not exist, create it
def getChildToExpandIndex(self):
maxUCT = -float("inf")
bestChildIndex = None
for i in range(len(self.children)):
if self.children[i] is None:
if self.childProbs is None:
# No child probs and child does not exist, just greedy explore
return i
else:
# Use UCT without win percent, just explore/probability
UCT = self.explore * self.childProbs[i] * sqrt(log(self.sims))
else:
UCT = self.children[i].getUCT()
if UCT >= maxUCT:
maxUCT = UCT
bestChildIndex = i
return bestChildIndex
def getUCT(self):
# UCT formula as specified by wikipedia
#return (self.score/self.sims) + self.explore * sqrt(log(self.parent.sims) / self.sims)
# UCT based on Alpha Zero, using probability to scale the explore factor seems to make the mose sense
return (self.score/self.sims) + self.explore * self.probability * sqrt(log(self.parent.sims) / self.sims)
def getWinChance(self):
return self.score / max(self.sims, 1)
def getScore(self):
if self.isTerminal:
if self.terminalScore == 0.5:
return 0
elif self.terminalScore >= 1:
if self.state.turn == BLUE:
return 1
else:
return -1
elif self.terminalScore <= 0:
if self.state.turn == BLUE:
return -1
else:
return 1
if self.actionFor == BLUE:
return 2 * self.getWinChance() - 1
else:
return - (2 * self.getWinChance() - 1)
def getMoveProbabilities(self):
size = self.state.boardSize
moveProbs = [[[0 for x in range(size + 1)] for y in range(size + 1)] for z in range(size + 1)]
for i in range(len(self.children)):
child = self.children[i]
move = self.state.availableMoves[i]
# Get x,y,z from move tuple
x = move[0]
y = move[1]
z = move[2]
if child is None:
#print("adding " + str(self.state.availableMoves[i]) + " : " + 0 + " : " + str((self.sims-1)))
#moveProbs.append((self.state.availableMoves[i], 0))
moveProbs[x][y][z] = 0
else:
#print("adding " + str(self.state.availableMoves[i]) + " : " + str(child.sims) + " : " + str((self.sims-1)))
#moveProbs.append((self.state.availableMoves[i], child.sims/(self.sims-1)))
moveProbs[x][y][z] = child.sims/(self.sims-1)
return moveProbs