forked from DLR-RM/rl-baselines3-zoo
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrender_optimization_policies.py
59 lines (49 loc) · 1.56 KB
/
render_optimization_policies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
from os.path import exists
import numpy as np
import pettingzoo.butterfly.pistonball_v5 as pistonball_v5
import supersuit as ss
from array2gif import write_gif
from stable_baselines3 import PPO
n_agents = 20
env = pistonball_v5.env()
env = ss.color_reduction_v0(env, mode="B")
env = ss.resize_v0(env, x_size=84, y_size=84)
env = ss.frame_stack_v1(env, 3)
policies = os.listdir("./optimization_policies/")
for policy in policies:
filepath = "./optimization_policies/" + policy + "/best_model"
if not exists(filepath + ".zip"):
continue
print("Loading new policy ", filepath)
model = PPO.load(filepath)
obs_list = []
i = 0
env.reset()
total_reward = 0
try:
while True:
for agent in env.agent_iter():
observation, reward, done, _ = env.last()
action = (
model.predict(observation, deterministic=True)[0]
if not done
else None
)
total_reward += reward
env.step(action)
i += 1
if i % (len(env.possible_agents) + 1) == 0:
obs_list.append(
np.transpose(env.render(mode="rgb_array"), axes=(1, 0, 2))
)
break
total_reward = total_reward / n_agents
print("writing gif")
write_gif(
obs_list,
"./optimization_gifs/" + policy + "_" + str(total_reward)[:5] + ".gif",
fps=15,
)
except:
print("error")