forked from dgriff777/rl_a3c_pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gym_eval.py
executable file
·163 lines (151 loc) · 4.64 KB
/
gym_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from __future__ import division
import os
os.environ["OMP_NUM_THREADS"] = "1"
import argparse
import torch
from environment import atari_env
from utils import read_config, setup_logger
from model import A3Clstm
from player_util import Agent
import gym
import logging
import time
#from gym.configuration import undo_logger_setup
#undo_logger_setup()
parser = argparse.ArgumentParser(description='A3C_EVAL')
parser.add_argument(
'--env',
default='Pong-v0',
metavar='ENV',
help='environment to train on (default: Pong-v0)')
parser.add_argument(
'--env-config',
default='config.json',
metavar='EC',
help='environment to crop and resize info (default: config.json)')
parser.add_argument(
'--num-episodes',
type=int,
default=100,
metavar='NE',
help='how many episodes in evaluation (default: 100)')
parser.add_argument(
'--load-model-dir',
default='trained_models/',
metavar='LMD',
help='folder to load trained models from')
parser.add_argument(
'--log-dir', default='logs/', metavar='LG', help='folder to save logs')
parser.add_argument(
'--render',
default=False,
metavar='R',
help='Watch game as it being played')
parser.add_argument(
'--render-freq',
type=int,
default=1,
metavar='RF',
help='Frequency to watch rendered game play')
parser.add_argument(
'--max-episode-length',
type=int,
default=10000,
metavar='M',
help='maximum length of an episode (default: 100000)')
parser.add_argument(
'--gpu-id',
type=int,
default=-1,
help='GPU to use [-1 CPU only] (default: -1)')
parser.add_argument(
'--skip-rate',
type=int,
default=4,
metavar='SR',
help='frame skip rate (default: 4)')
parser.add_argument(
'--seed',
type=int,
default=1,
metavar='S',
help='random seed (default: 1)')
parser.add_argument(
'--new-gym-eval',
default=False,
metavar='NGE',
help='Create a gym evaluation for upload')
args = parser.parse_args()
setup_json = read_config(args.env_config)
env_conf = setup_json["Default"]
for i in setup_json.keys():
if i in args.env:
env_conf = setup_json[i]
gpu_id = args.gpu_id
torch.manual_seed(args.seed)
if gpu_id >= 0:
torch.cuda.manual_seed(args.seed)
saved_state = torch.load(
'{0}{1}.dat'.format(args.load_model_dir, args.env),
map_location=lambda storage, loc: storage)
log = {}
setup_logger('{}_mon_log'.format(args.env), r'{0}{1}_mon_log'.format(
args.log_dir, args.env))
log['{}_mon_log'.format(args.env)] = logging.getLogger('{}_mon_log'.format(
args.env))
d_args = vars(args)
for k in d_args.keys():
log['{}_mon_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k]))
env = atari_env("{}".format(args.env), env_conf, args)
num_tests = 0
start_time = time.time()
reward_total_sum = 0
player = Agent(None, env, args, None)
player.model = A3Clstm(player.env.observation_space.shape[0],
player.env.action_space)
player.gpu_id = gpu_id
if gpu_id >= 0:
with torch.cuda.device(gpu_id):
player.model = player.model.cuda()
if args.new_gym_eval:
player.env = gym.wrappers.Monitor(
player.env, "{}_monitor".format(args.env), force=True)
if gpu_id >= 0:
with torch.cuda.device(gpu_id):
player.model.load_state_dict(saved_state)
else:
player.model.load_state_dict(saved_state)
player.model.eval()
for i_episode in range(args.num_episodes):
player.state = player.env.reset()
player.state = torch.from_numpy(player.state).float()
if gpu_id >= 0:
with torch.cuda.device(gpu_id):
player.state = player.state.cuda()
player.eps_len += 2
reward_sum = 0
while True:
if args.render:
if i_episode % args.render_freq == 0:
player.env.render()
player.action_test()
reward_sum += player.reward
if player.done and not player.info:
state = player.env.reset()
player.eps_len += 2
player.state = torch.from_numpy(state).float()
if gpu_id >= 0:
with torch.cuda.device(gpu_id):
player.state = player.state.cuda()
elif player.info:
num_tests += 1
reward_total_sum += reward_sum
reward_mean = reward_total_sum / num_tests
log['{}_mon_log'.format(args.env)].info(
"Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}".
format(
time.strftime("%Hh %Mm %Ss",
time.gmtime(time.time() - start_time)),
reward_sum, player.eps_len, reward_mean))
player.eps_len = 0
break