-
Notifications
You must be signed in to change notification settings - Fork 0
/
environment.py
88 lines (65 loc) · 2.18 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import numpy as np
class StationaryEnvironment:
def __init__(self, num_actions, init_mean, init_std, noise_mean, noise_std):
"""
Initialize an environment for bandits.
:param num_actions: Number of actions.
:param init_mean: Reward mean.
:param init_std: Reward standard deviation.
:param noise_mean: Reward noise mean.
:param noise_std: Raward noise standard deviation.
"""
self.num_actions = num_actions
self.init_mean = init_mean
self.init_std = init_std
self.noise_mean = noise_mean
self.noise_std = noise_std
self.action_values = None
self.reset()
def reset(self):
"""
Reset the environment.
:return: None.
"""
self.action_values = np.random.normal(self.init_mean, self.noise_std, size=self.num_actions)
def act(self, action):
"""
Take an action in the environment.
:param action: An action (index from 0 to num_actions - 1).
:return: Reward for the action plus noise.
"""
assert 0 <= action < self.num_actions
value = self.action_values[action]
value += np.random.normal(self.noise_mean, self.noise_std)
return value
class NonStationaryEnvironment:
def __init__(self, num_actions, init_value, walk_std, noise_mean, noise_std):
self.num_actions = num_actions
self.init_value = init_value
self.walk_std = walk_std
self.noise_mean = noise_mean
self.noise_std = noise_std
self.action_values = None
self.reset()
def reset(self):
"""
Reset the environment.
:return: None.
"""
self.action_values = np.zeros(self.num_actions) + self.init_value
def act(self, action):
"""
Take an action in the environment.
:param action: An action (index from 0 to num_actions - 1).
:return: Reward for the action plus noise.
"""
assert 0 <= action < self.num_actions
value = self.action_values[action]
value += np.random.normal(self.noise_mean, self.noise_std)
return value
def step(self):
"""
Take a single step in a random walk.
:return: None.
"""
self.action_values += np.random.normal(0, self.walk_std, size=self.num_actions)