-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmatch.py
161 lines (113 loc) · 4.22 KB
/
match.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# -*- coding: utf-8 -*-
from sys import stdout
from game import FizzBuzzGame, FEATURE_SIZE, PLAYER_NB, DRAW_NB
import numpy as np
import search
class Feed(object):
# class of feed for training
def __init__(self):
self._feature = []
self._prob = []
self._result = []
self.size = 0
self.forget_size = [0 for _ in range(10)]
def append(self, f_, p_, r_):
# add feature, prob and result
self._feature.append(f_)
self._prob.append(p_)
self._result.append(r_)
self.size += 1
self.forget_size[0] += 1
def forget(self):
# delete the oldest record
fs = self.forget_size[-1]
self._feature = self._feature[fs:]
self._prob = self._prob[fs:]
self._result = self._result[fs:]
self.forget_size.pop()
self.forget_size.insert(0, 0)
self.size -= fs
def get(self):
# returns numpy array
return (np.stack(self._feature).astype(np.float32),
np.stack(self._prob).astype(np.float32),
np.stack(self._result).astype(np.float32))
class FeedPicker(object):
# class of random picker for feed
def __init__(self, feed_):
self._feature, self._prob, self._result = feed_.get()
self.size = self._feature.shape[0]
self._idx = 0
self._perm = np.arange(self.size)
np.random.shuffle(self._perm)
def next_batch(self, batch_size=100):
if self._idx + batch_size > self.size:
np.random.shuffle(self._perm)
self._idx = 0
start = self._idx
self._idx += batch_size
end = self._idx
# slice for mini-batch
f_batch = self._feature[self._perm[start:end]]
p_batch = self._prob[self._perm[start:end]]
r_batch = self._result[self._perm[start:end]]
return f_batch, p_batch, r_batch
def feed_match(feed, match_cnt, search_limit, ckpt_path,
initial_life=1, use_gpu=True, gpu_idx=0,
reuse=False, show_info=True):
# delete old feed
feed.forget()
tree = search.Tree(ckpt_path, use_gpu, gpu_idx, reuse)
fbg = FizzBuzzGame(initial_life)
prob_leaf = np.full((FEATURE_SIZE), 0.0)
correct_cnt = 0
play_cnt = 0
lengths = []
print("")
for i in range(match_cnt):
fbg.clear()
tree.clear()
continue_game = True
while(continue_game and fbg.next_num <= DRAW_NB):
# show log only in the first match
show_info = show_info and i == 0
num = tree.search(fbg, search_limit, True, show_info)
prob = tree.node[tree.root_id].visit_cnt
prob = prob if prob.sum() == 0 else prob.astype(float) / prob.sum()
feed.append(fbg.feature(), prob, 0)
if fbg.legal(num, fbg.next_num):
correct_cnt += 1
play_cnt += 1
continue_game = fbg.play(num)
stdout.write("\r%03d/%03d games" % (i + 1, match_cnt))
stdout.flush()
is_draw = np.count_nonzero(fbg.lives) == PLAYER_NB
continue_length = DRAW_NB if is_draw else fbg.next_num - 2
lengths.append(continue_length)
result = 0 if is_draw else 1
feed.append(fbg.feature(), prob_leaf, result)
for j in range(fbg.next_num):
id_ = -(j + 1)
feed._result[id_] = result
result = -result
print("")
accuracy = float(correct_cnt) / play_cnt * 100 # percent
ave_length = float(sum(lengths)) / len(lengths)
print ("match: accuracy=%.1f[%%] average length=%.1f" % (
accuracy, ave_length))
log_file = open("match_log.txt", "a")
log_file.write("%.2f\t%.2f\n" % (accuracy, ave_length))
log_file.close()
return accuracy
def test_match(use_gpu, search_limit, initial_life=1, reuse=False, show_info=False):
# print test game
print("\n<test game>")
ckpt_path = "ckpt/model"
tree = search.Tree(ckpt_path, use_gpu, 0, reuse=reuse)
fbg = FizzBuzzGame(initial_life)
continue_game = True
while(continue_game):
num = tree.search(fbg, search_limit,
use_dirichlet=False, show_info=show_info)
continue_game = fbg.play(num)
fbg.print_record()