-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
106 lines (79 loc) · 2.91 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from typing import Dict, List
import os
import json
import numpy as np
import gym
from torch.utils.tensorboard import SummaryWriter
from DDQN import Agent
# Init. tensorboard summary writer
tb = SummaryWriter(log_dir=os.path.abspath('data/tensorboard'))
def encode_states(env: gym.Env, state: int) -> np.ndarray:
"""Converts a state of int format to one-hot encoded state of dimension: N states
Args:
env (gym.Env): gym environment
state (int): state
Returns:
np.ndarray: one-hot encoded state
"""
encoded_state = np.zeros(env.observation_space.n)
encoded_state[state] = 1.0
return encoded_state / encoded_state.sum()
if __name__ == '__main__':
# Init. Environment
env = gym.make("FrozenLake8x8-v1")
env.reset()
# Init. Datapath
data_path = os.path.abspath('data')
# Init. Training
n_games: int = 2000
best_score = -np.inf
score_history: List[float] = [] * n_games
avg_history: List[float] = [] * n_games
logging_info: List[Dict[str, float]] = [] * n_games
# Init. Agent
agent = Agent(env=env, n_games=n_games)
for i in range(n_games):
score: float = 0.0
done: bool = False
# Initial Reset of Environment
state = env.reset()
state = encode_states(env, state)
while not done:
action = agent.choose_action(state)
next_state, reward, done, _ = env.step(action)
next_state = encode_states(env, next_state)
agent.memory.add(state, action, reward, next_state, done)
state = next_state
score += reward
agent.optimize()
agent.n_games = 1
score_history.append(score)
avg_score: float = np.mean(score_history[-100:])
avg_history.append(avg_score)
if avg_score > best_score:
best_score = avg_score
agent.save_models(data_path)
print(f'Episode:{i}'
f'\t ACC. Rewards: {score:3.2f}'
f'\t AVG. Rewards: {avg_score:3.2f}'
f'\t *** MODEL SAVED! ***')
else:
print(f'Episode:{i}'
f'\t ACC. Rewards: {score:3.2f}'
f'\t AVG. Rewards: {avg_score:3.2f}')
episode_info = {
'Episode': i,
'Total Episodes': n_games,
'Epidosic Summed Rewards': score,
'Moving Mean of Episodic Rewards': avg_score
}
logging_info.append(episode_info)
# Add info. to tensorboard
tb.add_scalars('training_rewards',
{'Epidosic Summed Rewards': score,
'Moving Mean of Episodic Rewards': avg_score}, i)
# Dump .json
with open(os.path.join(data_path, 'training_info.json'), 'w', encoding='utf8') as file:
json.dump(logging_info, file, indent=4, ensure_ascii=False)
# Close tensorboard writer
tb.close()