-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
334 lines (271 loc) · 11.8 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
import numpy as np
import random
from collections import deque
import pickle
import pygame
import time
from datetime import datetime
import os
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
class SnakeGame:
def __init__(self, width=20, height=20):
self.width = width
self.height = height
self.reset()
def reset(self):
self.snake = deque([(self.height//2, self.width//2)])
self.direction = random.choice([(0, 1), (0, -1), (1, 0), (-1, 0)])
self.place_food()
self.score = 0
self.steps = 0
self.steps_without_food = 0
return self._get_state()
def place_food(self):
while True:
self.food = (random.randint(0, self.height-1),
random.randint(0, self.width-1))
if self.food not in self.snake:
break
def step(self, action):
self.steps += 1
self.steps_without_food += 1
# Actions: 0 (straight), 1 (right), 2 (left)
if action == 1:
self.direction = (-self.direction[1], self.direction[0])
elif action == 2:
self.direction = (self.direction[1], -self.direction[0])
new_head = (self.snake[0][0] + self.direction[0],
self.snake[0][1] + self.direction[1])
# Check if game is over
game_over = (
new_head[0] < 0 or new_head[0] >= self.height or
new_head[1] < 0 or new_head[1] >= self.width or
new_head in self.snake or
self.steps_without_food > 100 # Prevent infinite loops
)
if game_over:
reward = -1 - (len(self.snake) / 4) # Bigger penalty for dying with longer snake
return self._get_state(), reward, True
self.snake.appendleft(new_head)
# Calculate distance-based reward
old_distance = abs(self.snake[0][0] - self.food[0]) + abs(self.snake[0][1] - self.food[1])
new_distance = abs(new_head[0] - self.food[0]) + abs(new_head[1] - self.food[1])
distance_reward = (old_distance - new_distance) * 0.1
# Check if food is eaten
if new_head == self.food:
self.score += 1
self.steps_without_food = 0
reward = 1 + (len(self.snake) * 0.05) # Bigger reward for eating with longer snake
self.place_food()
else:
self.snake.pop()
reward = distance_reward - 0.005 # Small penalty for each step
return self._get_state(), reward, False
def _is_danger(self, direction):
head = self.snake[0]
next_pos = (head[0] + direction[0], head[1] + direction[1])
# Check if next position is out of bounds
if (next_pos[0] < 0 or next_pos[0] >= self.height or
next_pos[1] < 0 or next_pos[1] >= self.width):
return True
# Check if next position contains snake body
if next_pos in self.snake:
return True
return False
def _get_state(self):
head = self.snake[0]
# Calculate relative positions of body parts
body_positions = set()
for i in range(1, min(4, len(self.snake))): # Consider up to 3 body segments
rel_pos = (self.snake[i][0] - head[0],
self.snake[i][1] - head[1])
body_positions.add(rel_pos)
state = [
# Danger in 8 directions (N, NE, E, SE, S, SW, W, NW)
*[self._is_danger((dx, dy)) for dx, dy in [
(-1, 0), (-1, 1), (0, 1), (1, 1),
(1, 0), (1, -1), (0, -1), (-1, -1)]],
# Food direction (8 directions)
self.food[0] < head[0], # N
self.food[0] < head[0] and self.food[1] > head[1], # NE
self.food[1] > head[1], # E
self.food[0] > head[0] and self.food[1] > head[1], # SE
self.food[0] > head[0], # S
self.food[0] > head[0] and self.food[1] < head[1], # SW
self.food[1] < head[1], # W
self.food[0] < head[0] and self.food[1] < head[1], # NW
# Current direction
self.direction == (-1, 0), # N
self.direction == (0, 1), # E
self.direction == (1, 0), # S
self.direction == (0, -1), # W
# Length of snake (normalized)
len(self.snake) / (self.width * self.height)
]
return np.array(state, dtype=float)
def render(self, screen):
"""New method to render the game state"""
# Clear screen
screen.fill((0, 0, 0))
# Draw food
pygame.draw.rect(screen, (255, 0, 0),
(self.food[1]*20, self.food[0]*20, 20, 20))
# Draw snake
for segment in self.snake:
pygame.draw.rect(screen, (0, 255, 0),
(segment[1]*20, segment[0]*20, 20, 20))
pygame.display.flip()
class QLearningAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
self.q_table = {}
self.epsilon = 1.0
self.epsilon_min = 0.001593721112625894
self.epsilon_decay = 0.9962050178281874
self.learning_rate = 0.4525171577269225
self.learning_rate_decay = 0.9996387403565626
self.gamma = 0.9634379410341871
self.memory = deque(maxlen=20000)
self.batch_size = 188
def get_state_key(self, state):
# Discretize continuous values
return tuple(np.round(state, 3))
def act(self, state):
if random.random() < self.epsilon:
return random.randrange(self.action_size)
state_key = self.get_state_key(state)
if state_key not in self.q_table:
self.q_table[state_key] = np.zeros(self.action_size)
return np.argmax(self.q_table[state_key])
def train(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
if len(self.memory) >= self.batch_size:
batch = random.sample(self.memory, self.batch_size)
for state, action, reward, next_state, done in batch:
self._update(state, action, reward, next_state, done)
if done:
self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
self.learning_rate *= self.learning_rate_decay
def _update(self, state, action, reward, next_state, done):
state_key = self.get_state_key(state)
next_state_key = self.get_state_key(next_state)
if state_key not in self.q_table:
self.q_table[state_key] = np.zeros(self.action_size)
if next_state_key not in self.q_table:
self.q_table[next_state_key] = np.zeros(self.action_size)
old_value = self.q_table[state_key][action]
next_max = np.max(self.q_table[next_state_key])
new_value = (1 - self.learning_rate) * old_value + \
self.learning_rate * (reward + self.gamma * next_max * (1 - done))
self.q_table[state_key][action] = new_value
def save(self, filename):
"""Save the Q-table and agent parameters"""
save_data = {
'q_table': self.q_table,
'epsilon': self.epsilon,
'learning_rate': self.learning_rate
}
with open(filename, 'wb') as f:
pickle.dump(save_data, f)
def load(self, filename):
"""Load a saved Q-table and agent parameters"""
with open(filename, 'rb') as f:
save_data = pickle.load(f)
self.q_table = save_data['q_table']
self.epsilon = save_data['epsilon']
self.learning_rate = save_data['learning_rate']
def visualize_agent(agent, env, speed=5): # Reduced default speed from 100 to 5
"""Visualize the agent playing"""
pygame.init()
screen = pygame.display.set_mode((env.width * 20, env.height * 20))
pygame.display.set_caption('Snake AI')
state = env.reset()
done = False
while not done:
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
return
action = agent.act(state)
state, _, done = env.step(action)
env.render(screen)
time.sleep(1/speed) # Now 0.2 seconds per move at default speed
pygame.quit()
def train_model(episodes=10000):
env = SnakeGame()
state_size = 29 # Updated state size
action_size = 3
agent = QLearningAgent(state_size, action_size)
scores = []
best_score = 0
checkpoint_interval = 1000 # Save every 1000 episodes
# Create directory for checkpoints
checkpoint_dir = f"checkpoints_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
os.makedirs(checkpoint_dir, exist_ok=True)
for episode in range(episodes):
state = env.reset()
total_reward = 0
while True:
action = agent.act(state)
next_state, reward, done = env.step(action)
agent.train(state, action, reward, next_state, done)
state = next_state
total_reward += reward
if done:
scores.append(env.score)
if env.score > best_score:
best_score = env.score
# Save best model
agent.save(f"{checkpoint_dir}/best_model.pkl")
if episode % 100 == 0:
avg_score = np.mean(scores[-100:])
print(f"Episode: {episode}, Average Score: {avg_score:.2f}, "
f"Best Score: {best_score}, Epsilon: {agent.epsilon:.2f}, "
f"Learning Rate: {agent.learning_rate:.3f}")
# Save checkpoint
if episode % checkpoint_interval == 0:
agent.save(f"{checkpoint_dir}/checkpoint_{episode}.pkl")
break
return agent, scores
if __name__ == "__main__":
# Train the model
trained_agent, training_scores = train_model()
# Plot learning curve
figure(figsize=(10, 6))
plt.plot(training_scores)
plt.title('Learning Curve')
plt.xlabel('Episode')
plt.ylabel('Score')
# Add moving average
window_size = 100
moving_avg = np.convolve(training_scores, np.ones(window_size)/window_size, mode='valid')
plt.plot(range(window_size-1, len(training_scores)), moving_avg, 'r', label=f'{window_size}-episode moving average')
plt.legend()
plt.grid(True)
plt.savefig('learning_curve.png')
plt.show()
# Save the final model
trained_agent.save("final_model.pkl")
# Find and visualize different checkpoints
import glob
checkpoint_dirs = glob.glob("checkpoints_*")
if checkpoint_dirs:
latest_dir = max(checkpoint_dirs)
checkpoints = sorted(glob.glob(f"{latest_dir}/checkpoint_*.pkl"))
for checkpoint in checkpoints:
episode_num = checkpoint.split('_')[-1].split('.')[0] # Extract episode number
pygame.display.set_caption(f'Snake AI - Checkpoint Episode {episode_num}')
print(f"\nVisualizing Checkpoint from Episode {episode_num}")
env = SnakeGame()
agent = QLearningAgent(29, 3)
agent.load(checkpoint)
visualize_agent(agent, env)
# Visualize the best model
pygame.display.set_caption('Snake AI - Best Model')
print("\nVisualizing Best Model")
env = SnakeGame()
agent = QLearningAgent(29, 3)
agent.load(f"{latest_dir}/best_model.pkl")
visualize_agent(agent, env)