-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
179 lines (150 loc) · 6.05 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# Bunch of misc utils who have nowhere else to go, poor fellahs...
import os
import json
from glob import glob
import numpy as np
def load_and_interleave_monitor_csvs(paths_to_csvs):
"""
Load bunch of stable-baselines Monitor .csv files,
interleave them by timestamps and return a Dictionary
with keys "time", "rewards" and "length", each
mapping to a ndarray of corresponding values at different
episodes.
Using VecEnvs with Monitors creates multiple Monitor files
for one training run, so we need this interleaving to get
one "learning curve".
Arguments:
paths_to_csvs (List of str): Paths to .csv files to load
Returns:
loaded_values (Dict): Dict with keys "time", "rewards" and
"length"
"""
csv_datas = []
smallest_timestamp = np.inf
for path_to_csv in paths_to_csvs:
# Load data as np array
csv_data = None
try:
csv_data = np.genfromtxt(
path_to_csv,
names=True,
skip_header=1,
delimiter=","
)
except Exception as e:
print("Could not load file %s" % path_to_csv)
print(e)
# Load the header as JSON, so we get the starting time.
# Bit of waste to read the whole file, but oh well...
# Also remove first character "#"
header_json = open(path_to_csv).readlines()[0][1:]
start_timestamp = json.loads(header_json)["t_start"]
# Add this timestamp to timestamps in monitor
csv_data["t"] += start_timestamp
# Aaand track the smallest start_timestamp, which
# we will use as time zero
if start_timestamp < smallest_timestamp:
smallest_timestamp = start_timestamp
csv_datas.append(csv_data)
# Interleave by concatenating everything and
# then sorting. Super effecient :D
all_data = []
for csv_data in csv_datas:
all_data.extend(
zip(*[csv_data[key].tolist() for key in ["r", "l", "t"]])
)
# Sort by "t" (time)
all_data = sorted(all_data, key=lambda x: x[2])
# Unzip, convert back to ndarrays and return as dict
rewards, lengths, timesteps = zip(*all_data)
return_dict = {
"rewards": np.array(rewards),
"lengths": np.array(lengths),
"timesteps": np.array(timesteps),
}
# Subtract the smallest timestep to "start from zero",
# like monitor files do
return_dict["timesteps"] -= smallest_timestamp
return return_dict
def load_experiment_monitor(experiment_path):
"""
Load monitor data from one experiment directory.
Arguments:
experiment_path (str): Path to the experiment directory
Returns:
loaded_values (Dict): Dict with keys "time", "rewards" and
"length"
"""
csvs = glob(os.path.join(experiment_path, "*.csv"))
return load_and_interleave_monitor_csvs(csvs)
def load_experiment(experiment_path):
"""
Load a single learning-curve result, either a folder
with bunch of csv files (stable-baselines Monitor outputs),
or tsv-like files with each line being "[num_steps] [average_reward]
Arguments:
experiment_path (str): Path to the experiment to be loaded.
If a directory, assume it contains
bunch of stable-baselines monitor files.
If a file, assume it is a tsv file with
structure "[num_steps] [average_reward]"
Returns:
loaded_values (Dict): Dict with keys "steps" and "rewards",
"steps" being an array of number of steps
trained, and "rewards" corresponding
average episodic rewards
"""
if os.path.isdir(experiment_path):
# Bunch of stable-baselines Monitor CSVs
data = load_experiment_monitor(experiment_path)
# Turn episode lengths into numbers of steps
steps = np.cumsum(data["lengths"])
return_dict = {
"steps": steps,
"rewards": data["rewards"]
}
return return_dict
else:
# Assume a tsv file
data = np.loadtxt(experiment_path)
# Bit of preprocessing: In some cases,
# steps has multiple instances, in which
# case take an average.
steps = []
rewards = []
raw_steps = data[:, 0]
raw_rewards = data[:, 1]
unique_steps = np.sort(np.unique(raw_steps))
for unique_step in unique_steps:
steps.append(unique_step)
rewards.append(raw_rewards[raw_steps == unique_step].mean())
return_dict = {
"steps": np.array(steps),
"rewards": np.array(rewards)
}
return return_dict
def find_solved_point(experiment_data, window_size=20, solved_threshold=1):
"""
Find number of steps when agent has "solved" the environment,
i.e. reached >= solved_threshold reward for window_size times
Arguments:
experiment_data: Loaded Monitor data (see `load_experiment`)
window_size (int): How many successive games have to have
reward equal or above to solved_threshold
before environment is considered solved.
solved_threshold (float): Reward threshold for considering
environment "solved"
Returns:
agent_steps: Number of steps it took to solve the environment,
or None if environment was not solved
"""
rewards_above_threshold = experiment_data["rewards"] >= solved_threshold
successive_solves = np.convolve(np.ones((window_size,)), rewards_above_threshold, "same")
solve_indexes = np.where(successive_solves >= window_size)[0]
if len(solve_indexes) == 0:
# Was not solved at any point
return None
else:
# Return the number of steps where environment was solved
solved_idx = solve_indexes[0]
return experiment_data["steps"][solved_idx]