-
Notifications
You must be signed in to change notification settings - Fork 1
/
interface_step_update.py
231 lines (198 loc) · 8.03 KB
/
interface_step_update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#step_update
#take input as the location of the unit and output the step
from pysc2.lib import actions, features, units
from n_marine_interface import num_agents
import numpy as np
import random
## Game setup
red_base = (7,0)
blue_base = (0,7)
mountains = [(2,3), (3,3), (2,4), (3,4), (4,4), (4,5)]
directions = ['up', 'down', 'left', 'right', 'nop']
goal_dist=0
blue_done=0
## Grid size
size = 8
EPS_VAL_START = 1.0
OPPO_RANDOMNESS_START = 0.3
OPPO_RANDOMNESS_END = 0.05
blue_randomness=min(OPPO_RANDOMNESS_START, EPS_VAL_START)
max_dist = abs(blue_base[0] - red_base[0]) + abs(blue_base[1] - red_base[1])
# getting the number of marines
def total_number_of_soldiers():
num_of_soldiers = input("Enter number of marines: ")
num_of_soldiers = int(num_of_soldiers)
return num_of_soldiers
# num_agents=5
blue_pref_dir = [random.randint(0, 1) for _ in range(num_agents)]
def validMove(pos, dir):
if dir == 'nop':
return True
elif dir == 'up':
new_pos = (pos[0], pos[1]+1)
elif dir == 'down':
new_pos = (pos[0], pos[1]-1)
elif dir == 'left':
new_pos = (pos[0]-1, pos[1])
elif dir == 'right':
new_pos = (pos[0]+1, pos[1])
else:
new_pos = (pos[0], pos[1])
if new_pos[0] < 0 or new_pos[1] < 0 or new_pos[0] > size-1 or new_pos[1] > size-1:
return False
if new_pos in mountains:
return False
return True
def calculate_coords(x, y):
x_co = (x - 30) // 10
y_co = abs((y - 28) // 10 - 7)
return x_co, y_co
## method for Blue agent
def moveB(num_step, unit_tags, marine, xcor, ycor):
## Opponent Blue agent moves with a stochastic greedy policy based on
## its current manhattan distance from red base
blue_poses = []
# calculating the coordinates of the blue marines in the grid
for marine_obj in marine:
x_co, y_co = calculate_coords(marine_obj.x, marine_obj.y)
blue_poses.append((x_co, y_co))
# randomly selecting the directions or move commands for the blue marines
agent_step = []
for idx in range(num_agents):
if random.random() <= max(blue_randomness, OPPO_RANDOMNESS_END):
## Making it easier
dir = random.choice(directions)
if validMove(blue_poses[idx], dir):
agent_step.append(dir)
else:
agent_step.append("nop")
else:
if blue_pref_dir[idx]:
y_dis = blue_poses[idx][1] - red_base[1]
if y_dis > 0:
agent_step.append('down')
elif y_dis < 0:
agent_step.append('up')
else:
x_dis = blue_poses[idx][0] - red_base[0]
if x_dis > 0:
agent_step.append('left')
elif x_dis < 0:
agent_step.append('right')
else:
x_dis = blue_poses[idx][0] - red_base[0]
if x_dis > 0:
agent_step.append('left')
elif x_dis < 0:
agent_step.append('right')
else:
y_dis = blue_poses[idx][1] - red_base[1]
if y_dis > 0:
agent_step.append('down')
elif y_dis < 0:
agent_step.append('up')
if validMove(blue_poses[idx], agent_step[idx]):
agent_step[idx] = agent_step[idx]
else:
agent_step[idx] = "nop"
unit_actions = []
print("At step:", num_step, " BLUE agent location:", (x_co, y_co),"took action:", agent_step[idx])
blue_pos = []
done = False
global blue_done
# changing the coordinates of blue marines according to the direction
for i in range(num_agents):
ycor.append(marine[i].y)
xcor.append(marine[i].x)
if(agent_step[i]=="up"):
ycor[i]=int(ycor[i])-10
if(agent_step[i]=="down"):
ycor[i]=int(ycor[i])+10
if(agent_step[i]=="left"):
xcor[i]=int(xcor[i])-10
if(agent_step[i]=="right"):
xcor[i]=int(xcor[i])+10
blue_pos.append((xcor[i],ycor[i]))
# checking if the blue marines have reached the red base, in which case blur team wins
x_co, y_co = calculate_coords(xcor[i], ycor[i])
if (x_co, y_co) == red_base:
done = True
blue_done=1
unit_actions.append(actions.RAW_FUNCTIONS.Move_pt("now", unit_tags[i], (xcor[i],ycor[i])))
result = {"unit tag": [unit_tags[i]], "location": [xcor[i],ycor[i]]}
print("B:",result)
reward = 0
# calculating the reward based on the red marines postions
if done:
reward = -250*goal_dist
return unit_actions, blue_pos, done, reward
## method for red agent
def moveR(num_step, unit_tags, marine, xcor, ycor, blue_pos, player_net):
## Move Red agent based on the learned policy by consulting the Q table
unit_actions = []
agent_step=[]
oppo_x_values=[]
oppo_y_values=[]
reward = [0] * num_agents
reward_sum=0
obs = [[] for _ in range(num_agents)]
# getting locations of the red marines
for i in range(num_agents):
x_co, y_co = calculate_coords(marine[i].x, marine[i].y)
print("At step:", num_step, " RED agent location:", (x_co, y_co))
# getting the location of blue marines
for i, (oppo_x, oppo_y) in enumerate(blue_pos):
oppo_x, oppo_y = calculate_coords(oppo_x, oppo_y)
oppo_x_values.append(oppo_x)
oppo_y_values.append(oppo_y)
# moving red marine based on the action from player_net considering blue marines' locations
for i in range(num_agents):
x_co, y_co = calculate_coords(marine[i].x, marine[i].y)
agent_obs = [x_co, y_co]
for j in range(num_agents):
agent_obs.append(oppo_x_values[j])
agent_obs.append(oppo_y_values[j])
obs[i] = np.array(agent_obs, dtype=np.float32)
agent_step.append([directions[player_net.act(obs[i])]])
reward[i] = 0
print(agent_step[i])
if not validMove((x_co, y_co), agent_step[i]):
agent_step[i] = ["nop"]
reward[i] = -200
else:
reward[i] = -1
print("At step:", num_step, " agent location:", (x_co, y_co),"took action:", agent_step[i])
done = False
global goal_dist
total_dist=0
# move the red marines based on the actions from previous snippet
for i in range(num_agents):
ycor.append(marine[i].y)
xcor.append(marine[i].x)
if(agent_step[i][0]=="up"):
ycor[i]=int(ycor[i])-10
if(agent_step[i][0]=="down"):
ycor[i]=int(ycor[i])+10
if(agent_step[i][0]=="left"):
xcor[i]=int(xcor[i])-10
if(agent_step[i][0]=="right"):
xcor[i]=int(xcor[i])+10
agent_pos = (xcor[i],ycor[i])
x_co, y_co = calculate_coords(agent_pos[0], agent_pos[1])
red_pos=(x_co, y_co)
# checking if the red marine has moved the blue base, in that case red wins and reward is calculated.
if (x_co, y_co) == blue_base:
done = True
unit_actions.append(actions.RAW_FUNCTIONS.Move_pt("now", unit_tags[i], (xcor[i],ycor[i])))
result = {"unit tag": [unit_tags[i]], "location": [xcor[i],ycor[i]]}
print("R:",result)
# distance is calculated for blue teams rewards in case blue team wins.
dist = (abs(blue_base[0] - red_pos[0]) + abs(blue_base[1] - red_pos[1])) / max_dist
total_dist+=dist
if done:
reward[i] = 500
goal_dist=total_dist
# summing up rewards for each marine in red team.
for i in range(num_agents):
reward_sum+=reward[i]
return unit_actions, done, reward_sum