forked from MorvanZhou/train-robot-arm-from-scratch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
69 lines (53 loc) · 1.36 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
Make it more robust.
Stop episode once the finger stop at the final position for 50 steps.
Feature & reward engineering.
"""
from part5.env import ArmEnv
from part5.rl import DDPG
MAX_EPISODES = 500
MAX_EP_STEPS = 200
ON_TRAIN = True
# set env
env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound
# set RL method (continuous)
rl = DDPG(a_dim, s_dim, a_bound)
steps = []
def train():
# start training
for i in range(MAX_EPISODES):
s = env.reset()
ep_r = 0.
for j in range(MAX_EP_STEPS):
env.render()
a = rl.choose_action(s)
s_, r, done = env.step(a)
rl.store_transition(s, a, r, s_)
ep_r += r
if rl.memory_full:
# start to learn once has fulfilled the memory
rl.learn()
s = s_
if done or j == MAX_EP_STEPS-1:
print('Ep: %i | %s | ep_r: %.1f | step: %i' % (i, '---' if not done else 'done', ep_r, j))
break
rl.save()
def eval():
rl.restore()
env.render()
env.viewer.set_vsync(True)
while True:
s = env.reset()
for _ in range(200):
env.render()
a = rl.choose_action(s)
s, r, done = env.step(a)
if done:
break
if ON_TRAIN:
train()
else:
eval()