-
Notifications
You must be signed in to change notification settings - Fork 0
/
profile.py
77 lines (58 loc) · 2.32 KB
/
profile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import numpy as np
import json
import torch
import gym
import matplotlib.pyplot as plt
from DDQN import Agent
from train import encode_states
def predict_value(agent: Agent, state: np.ndarray) -> float:
with torch.no_grad():
state = torch.as_tensor(state, dtype=torch.float32, device=agent.network_one.device)
value = torch.max(agent.network_one.forward(state)) + torch.max(agent.network_zero.forward(state))
return value.item()
if __name__ == "__main__":
# Init. path
data_path = os.path.abspath('data')
# Init. Environment and agent
env = gym.make('FrozenLake8x8-v1')
env.reset()
agent = Agent(env=env, training=False)
agent.load_models(data_path)
with open(os.path.join(data_path, 'training_info.json')) as f:
train_data = json.load(f)
with open(os.path.join(data_path, 'testing_info.json')) as f:
test_data = json.load(f)
# Load all the data frames
score = [data["Epidosic Summed Rewards"] for data in train_data]
average = [data["Moving Mean of Episodic Rewards"] for data in train_data]
test = [data["Test Score"] for data in test_data]
# Process network data
state_value = np.zeros((8, 8))
k = 0
for i in range(state_value.shape[0]):
for j in range(state_value.shape[0]):
state_value[i, j] = predict_value(agent, encode_states(env, k))
k += 1
state_value /= state_value.max()
# Generate graphs
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(16, 5))
axes[0].plot(score, alpha=0.5, label='Episodic summation')
axes[0].plot(average, label='Moving mean of 100 episodes')
axes[0].grid(True)
axes[0].set_xlabel('Training Episodes')
axes[0].set_ylabel('Rewards')
axes[0].legend(loc='best')
axes[0].set_title('Training Profile')
axes[1].boxplot(test)
axes[1].grid(True)
axes[1].set_xlabel('Test Run')
axes[1].set_title('Testing Profile')
axes[2].imshow(state_value)
axes[2].set_xlabel('state')
axes[2].set_title("Agent Value Estimation")
for x in range(state_value.shape[0]):
for y in range(state_value.shape[1]):
axes[2].text(x, y, np.around(state_value[y, x], 2), c='white', weight='bold', ha='center', va='center')
fig.tight_layout()
plt.savefig(os.path.join(data_path, 'DDQN Agent Profiling.png'))