-
Notifications
You must be signed in to change notification settings - Fork 1
/
main_evaluation.py
150 lines (135 loc) · 4.92 KB
/
main_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import pickle
import json
from definitions import ROOT_DIR
from src.envs.environment_factory import EnvironmentFactory
from src.helpers.loading import get_env_and_config, get_trainer
from src.helpers.attention import compute_action_without_encoding
"""
Test a pretrained agent on a set of 100 body configurations per perturbation level. Independently
of the training sigma, the agent will be evaluated on the test sets for all the levels
(0.1, 0.3, 0.5, 0.7). This means that the tests will be both IID and OOD, depending on the training
sigma.
The output will be saved at "data/{env_name}/performance", unless the parameter "out_folder" is
modified.
The code looks for the saved checkpoint in the folder
"data/{env_name}/pretrained/{algorithm}_sigma_{sigma_literal}_seed_{seed}/checkpoint",
so make sure it is there. Furthermore, it expects the configuration file to be in the folder
"data/{env_name}/pretrained/{algorithm}_sigma_{sigma_literal}_seed_{seed}_data". Please see
the example provided for half_cheetah oracle sigma 0.1 seed 2 in "data/half_cheetah/pretrained.
"""
env_name = "half_cheetah" # walker, half_cheetah, hopper, ant
seed = 2 # 0, 1, 2, 3, 4
sigma = 0.1 # 0.1, 0.3, 0.5
algorithm = "oracle" # "simple", "oracle", "rma", "tcn", "dmap", "dmap-ne"
sigma_literal = str(sigma).replace(".", "")
out_name_specs = f"{algorithm}_seed_{seed}_sigma_{sigma_literal}"
out_folder = os.path.join(ROOT_DIR, "data", env_name, "performance")
xml_folder_path = os.path.join(ROOT_DIR, "data", "xmls", env_name)
folder_names_list = [name for name in os.listdir(xml_folder_path) if "test_" in name]
results_dict = {}
agent = "_".join(
(
env_name,
algorithm,
"sigma",
sigma_literal,
"seed",
str(seed),
)
)
if algorithm == "rma":
config_folder_path = os.path.join(
ROOT_DIR,
"data",
env_name,
"rma",
f"sigma_{sigma_literal}_seed_{seed}",
"step_10",
)
checkpoint_path = os.path.join(
config_folder_path,
"checkpoint_000000",
"checkpoint-0",
)
else:
if algorithm == "dmap-ne":
load_algo = "dmap"
else:
load_algo = algorithm
checkpoint_path = os.path.join(
ROOT_DIR,
"data",
env_name,
"pretrained",
f"{load_algo}_sigma_{sigma_literal}_seed_{seed}",
"checkpoint",
"checkpoint",
)
config_folder_path = "_".join((checkpoint_path.split("/checkpoint")[0], "data"))
config_file_name = [
filename for filename in os.listdir(config_folder_path) if ".json" in filename
][0]
config_file_path = os.path.join(config_folder_path, config_file_name)
env, config = get_env_and_config(config_file_path)
print(
"Creating trainer for agent:",
agent,
)
trainer = get_trainer(config, checkpoint_path=checkpoint_path)
if algorithm == "dmap-ne":
policy = trainer.get_policy()
action_dim = env.env.action_dim
embedding_dim = policy.model.action_model.embedding_size
device = "cpu" if config.num_gpus == 0 else "cuda"
policy_net_list = []
for i in range(action_dim):
policy_net = getattr(policy.model.action_model, f"_policy_fcnet_{i}")
policy_net_list.append(policy_net)
results_dict[agent] = {
"results": {},
}
for folder_name in folder_names_list:
folder_path = os.path.join(ROOT_DIR, "data", "xmls", env_name, folder_name)
results_dict[agent]["results"][folder_name] = []
# Get the test perturbations from a saved list
with open(os.path.join(folder_path, "perturbation_summary.pkl"), "rb") as file:
perturbation_summary = pickle.load(file)
assert perturbation_summary["perturbations"] == env.perturbation_list
for perturbation_vals in perturbation_summary["values"]:
print("Testing on perturbations", perturbation_vals)
env = EnvironmentFactory.create(
config.env_name,
sigma=1, # This way the perturbation values are not rescaled
perturbation_vals=perturbation_vals,
render=False,
)
obs = env.reset()
done = False
cum_reward = 0
while not done:
if algorithm == "dmap-ne":
action = compute_action_without_encoding(
policy_net_list, obs, action_dim, embedding_dim, device
)
else:
action = trainer.compute_single_action(obs, explore=False)
obs, reward, done, info = env.step(action)
cum_reward += reward
results_dict[agent]["results"][folder_name].append(cum_reward)
print(
"agent: ",
agent,
", folder_path",
folder_path,
", episode reward: ",
cum_reward,
)
os.makedirs(out_folder, exist_ok=True)
out_path = os.path.join(
out_folder,
f"results_{out_name_specs}.json",
)
with open(out_path, "w") as file:
json.dump(results_dict, file)
print("Results dict file created at ", out_path)