This repository has been archived by the owner on Jan 17, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
model.py
67 lines (55 loc) · 3.44 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import tensorflow as tf
import tensorflow.contrib.layers as layers
def model(img_in, num_actions, scope, reuse=False):
"""As described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf"""
with tf.variable_scope(scope, reuse=reuse):
out = img_in
with tf.variable_scope("convnet"):
# original architecture
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
out = layers.flatten(out)
with tf.variable_scope("action_value"):
out = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
return out
def dueling_model(img_in, num_actions, scope, reuse=False):
"""As described in https://arxiv.org/abs/1511.06581"""
with tf.variable_scope(scope, reuse=reuse):
out = img_in
with tf.variable_scope("convnet"):
# original architecture
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
out = layers.flatten(out)
with tf.variable_scope("state_value"):
state_hidden = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
state_score = layers.fully_connected(state_hidden, num_outputs=1, activation_fn=None)
with tf.variable_scope("action_value"):
actions_hidden = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu)
action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions, activation_fn=None)
action_scores_mean = tf.reduce_mean(action_scores, 1)
action_scores = action_scores - tf.expand_dims(action_scores_mean, 1)
return state_score + action_scores
def bootstrap_model(img_in, num_actions, scope, reuse=False):
""" As described in https://arxiv.org/abs/1602.04621"""
with tf.variable_scope(scope, reuse=reuse), tf.device("/gpu:0"):
out = img_in
with tf.variable_scope("convnet"):
# original architecture
out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
out = layers.flatten(out)
out_list =[]
with tf.variable_scope("heads"):
for _ in range(10):
scope_net = "action_value_head_" + str(_)
with tf.variable_scope(scope_net):
out_temp = out
out_temp = layers.fully_connected(out_temp, num_outputs=512, activation_fn=tf.nn.relu)
out_temp = layers.fully_connected(out_temp, num_outputs=num_actions, activation_fn=None)
out_list.append(out_temp)
return out_list