-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathlibcomyco.py
127 lines (104 loc) · 4.89 KB
/
libcomyco.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import numpy as np
import tensorflow as tf
import tflearn
import time
import warnings
import pool
RAND_RANGE = 1000
FEATURE_NUM = 128
# you can use mish active function instead
# the total performance will be improved a little bit.
def mish(x):
return x * tf.nn.tanh(tf.nn.softplus(x))
class libcomyco(object):
def __init__(self, sess, S_INFO, S_LEN, A_DIM, LR_RATE=1e-4, ID=1):
self.pool_ = pool.pool()
self.sess = sess
self.S_INFO = S_INFO
self.S_LEN = S_LEN
self.A_DIM = A_DIM
self.s_name = 'actor/' + str(ID)
self.inputs, self.out = self.create_network()
self.real_out = tf.clip_by_value(self.out, 1e-4, 1. - 1e-4)
self.y_ = tf.placeholder(shape=[None, A_DIM], dtype=tf.float32)
# you can use any loss you want
# self.core_net_loss = tflearn.objectives.mean_square(
# self.real_out, self.y_) + 1e-3 * tf.reduce_sum(tf.multiply(self.real_out, tf.log(self.real_out)))
# self.core_net_loss = -tf.reduce_sum(self.y_ * tf.log(self.real_out)) + 1e-3 * tf.reduce_sum(tf.multiply(self.real_out, tf.log(self.real_out)))
# Note: here is a minor mistake in the camera ready paper, the fomular of eq(4) should be l_comyco = -log(pi) * A* - \beta * entropy
self.core_net_loss = tflearn.objectives.categorical_crossentropy(
self.real_out, self.y_) + 1e-3 * tf.reduce_sum(tf.multiply(self.real_out, tf.log(self.real_out)))
self.core_train_op = tf.train.AdamOptimizer(
learning_rate=LR_RATE).minimize(self.core_net_loss)
self.saver = tf.train.Saver() # save neural net parameters
def create_network(self):
with tf.variable_scope('actor'):
inputs = tflearn.input_data(
shape=[None, self.S_INFO, self.S_LEN])
split_0 = tflearn.fully_connected(
inputs[:, 0:1, -1], FEATURE_NUM, activation='relu')
split_1 = tflearn.fully_connected(
inputs[:, 1:2, -1], FEATURE_NUM, activation='relu')
split_2 = tflearn.conv_1d(
inputs[:, 2:3, :], FEATURE_NUM, 4, activation='relu')
split_3 = tflearn.conv_1d(
inputs[:, 3:4, :], FEATURE_NUM, 4, activation='relu')
split_4 = tflearn.conv_1d(
inputs[:, 4:5, :self.A_DIM], FEATURE_NUM, 4, activation='relu')
split_5 = tflearn.conv_1d(
inputs[:, 5:6, :self.A_DIM], FEATURE_NUM, 4, activation='relu')
split_6 = tflearn.fully_connected(
inputs[:, 6:7, -1], FEATURE_NUM, activation='relu')
split_2_flat = tflearn.flatten(split_2)
split_3_flat = tflearn.flatten(split_3)
split_4_flat = tflearn.flatten(split_4)
split_5_flat = tflearn.flatten(split_5)
merge_net = tf.stack(
[split_0, split_1, split_2_flat,
split_3_flat, split_4_flat, split_5_flat, split_6], axis=-1)
# shuffle to fit gru layer
merge_net = tf.transpose(merge_net, [0, 2, 1])
dense_net_0 = tflearn.gru(
merge_net, FEATURE_NUM, activation='relu')
out = tflearn.fully_connected(
dense_net_0, self.A_DIM, activation='softmax')
return inputs, out
def predict(self, state):
action_prob = self.sess.run(self.real_out, feed_dict={
self.inputs: np.reshape(state, (-1, self.S_INFO, self.S_LEN))
})
# randomly picks an action
action_cumsum = np.cumsum(action_prob)
bit_rate = (action_cumsum > np.random.randint(
1, RAND_RANGE) / float(RAND_RANGE)).argmax()
return action_prob, bit_rate
def loss(self, state, action_real_vec):
loss_ = self.sess.run(self.core_net_loss, feed_dict={
self.inputs: np.reshape(state, (-1, self.S_INFO, self.S_LEN)),
self.y_: np.reshape(action_real_vec, (-1, self.A_DIM))
})
return loss_
def submit(self, state, action_real_vec):
self.pool_.submit(state, action_real_vec)
def train(self):
training_s_batch, training_a_batch = self.pool_.get()
if training_s_batch.shape[0] > 0:
self.sess.run(self.core_train_op, feed_dict={
self.inputs: np.array(training_s_batch),
self.y_: np.array(training_a_batch)
})
def save(self, filename):
self.saver.save(self.sess, filename)
def load(self, filename):
self.saver.restore(self.sess, filename)
def compute_entropy(self, x):
"""
Given vector x, computes the entropy
H(x) = - sum( p * log(p))
"""
H = 0.0
x = np.clip(x, 1e-5, 1.)
for i in range(len(x)):
if 0 < x[i] < 1:
H -= x[i] * np.log(x[i])
return H