-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_custom_player.py
176 lines (146 loc) · 8.08 KB
/
my_custom_player.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
from sample_players import DataPlayer
import random, numpy
#import base, creator, algorithms, tools, support, selection, mutation, crossover
from workspace import base, creator, algorithms, tools, support, selection, mutation, crossover
#import base, creator, algorithms, tools
from math import floor, pow
from isolation.isolation import Action
atr_min=0 # Minimum bound for attribute value
atr_max=8 # Maximum bound for attribute value
ind_size=3 # Individual size (i.e., depth) corresponding to current and opponent moves. Thus, the first gene in individual is the next step for current player, the next gene is the second gene for the opponent, the third gene is the second next step for current player, and so on
mut_pb=0.05 # Mutation probability
gen_size=1 # Number of generations (i.e., iterations)
pop_size=30#floor(0.5*pow(atr_max,ind_size)) # Population size per each generation
sel_size=10 # Number of individuals to be selected
CXPB, MUTPB = 0.5, 0.2 # Crossover and mutation probabilities
attrAction={0:Action.NNE,1:Action.ENE,2:Action.ESE,3:Action.SSE,4:Action.SSW,5:Action.WSW, \
6:Action.WNW,7:Action.NNW}
def score(state,player_id):
own_loc = state.locs[player_id]
opp_loc = state.locs[1 - player_id]
own_liberties = state.liberties(own_loc)
opp_liberties = state.liberties(opp_loc)
return len(own_liberties) - len(opp_liberties)
# the goal ('fitness') function to be maximized
def evaluate(individual,state,player_id,attrAction,idx=0):
'''
The return value of current individual (i.e., the set of suggested movements for active player and opponent)
'''
if attrAction[individual[idx]] not in state.actions():
value=float("-inf") # Treat illegal move as lose
elif state.terminal_test():
value=state.utility(player_id)
elif idx>=len(individual)-1:
value=score(state,player_id)
else:
value = evaluate(individual, state.result(attrAction[individual[idx]]), player_id, attrAction, idx+1)
# Return tuble if idx==0 (as required by deap library). Otherwise, return single fitness value
if idx==0:
return value,
else:
return value
class CustomPlayer(DataPlayer):
""" Implement your own agent to play knight's Isolation
The get_action() method is the only required method for this project.
You can modify the interface for get_action by adding named parameters
with default values, but the function MUST remain compatible with the
default interface.
**********************************************************************
NOTES:
- The test cases will NOT be run on a machine with GPU access, nor be
suitable for using any other machine learning techniques.
- You can pass state forward to your agent on the next turn by assigning
any pickleable object to the self.context attribute.
**********************************************************************
"""
def __init__(self,player_id):
super().__init__(player_id)
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
self.toolbox = base.Toolbox()
# Attribute generator
self.toolbox.register("attr_action", random.randrange, atr_max)
# Structure initializers
self.toolbox.register("individual", tools.initRepeat, creator.Individual,self.toolbox.attr_action, ind_size)
# define the population to be a list of individuals
self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
# Define statistics and hall of fame objects
#hof = tools.HallOfFame(1)
self.hof=support.HallOfFame(1)
#stats = tools.Statistics(lambda ind: ind.fitness.values)
self.stats = support.Statistics(lambda ind: ind.fitness.values)
self.stats.register("avg", numpy.mean)
self.stats.register("std", numpy.std)
self.stats.register("min", numpy.min)
self.stats.register("max", numpy.max)
#----------
# Operator registration
#----------
# register the goal / fitness function
self.toolbox.register("evaluate", evaluate)
# register the crossover operator
#toolbox.register("mate", tools.cxTwoPoint)
self.toolbox.register("mate", crossover.cxTwoPoint)
# register a mutation operator with a specified probability
#toolbox.register("mutate", tools.mutUniformInt, low=atr_min, up=atr_max-1, indpb=mut_pb)
self.toolbox.register("mutate", mutation.mutUniformInt, low=atr_min, up=atr_max-1, indpb=mut_pb)
# Select the specified number of best individuals
#toolbox.register("select", tools.selTournament, tournsize=3)
self.toolbox.register("select", selection.selTournament, tournsize=3)
def get_action(self, state):
""" Employ an adversarial search technique to choose an action
available in the current state calls self.queue.put(ACTION) at least
This method must call self.queue.put(ACTION) at least once, and may
call it as many times as you want; the caller will be responsible
for cutting off the function after the search time limit has expired.
See RandomPlayer and GreedyPlayer in sample_players for more examples.
**********************************************************************
NOTE:
- The caller is responsible for cutting off search, so calling
get_action() from your own code will create an infinite loop!
Refer to (and use!) the Isolation.play() function to run games.
**********************************************************************
"""
# TODO: Replace the example implementation below with your own search
# method by combining techniques from lecture
#
# EXAMPLE: choose a random move without any search--this function MUST
# call self.queue.put(ACTION) at least once before time expires
# (the timer is automatically managed for you)
#import random
#self.queue.put(random.choice(state.actions()))
# randomly select a move as player 1 or 2 on an empty board, otherwise
# return the sub-optimal move, found by optimization algorithms like GA, at a fixed search
# depth of individual size
#start=time.time()
if state.ply_count < 2:
self.queue.put(random.choice(state.actions()))
else:
# Select the final best individual
#toolbox.register("select_fin",tools.selBest, k=1)
# create an initial population of individuals (where each individual is a list of integers. Each integer corresponds to an action)
random.seed(64)
pop = self.toolbox.population(n=pop_size) # Initial random actions of specified depth(=individual size)
#for i in pop: print(i)
pop=algorithms.eaSimple_mod(pop, self.toolbox, CXPB, MUTPB, gen_size,\
state, stats=self.stats,player_id=self.player_id, \
attrAction=attrAction,halloffame=self.hof, verbose=True)
'''
with open("log.out","w") as f:
for i in pop:
f.write('Individual: '+str(i)+', fitness: '+str(i.fitness.values[0])+'\n')
f.write("hof: "+str(hof[0])+", fitness: "+str(hof[0].fitness.values[0])+'\n')
'''
if len(self.hof)!=0 and self.hof[0].fitness.values[0]!=float("-inf"):
self.queue.put(attrAction.get(self.hof[0][0])) # Add the sub-optimal individual (i.e., expected best next move) to the queu
else:
if state.actions:
self.queue.put(random.choice(state.actions()))
else:
print("No more solutions left")
'''
end=time.time()
if (end-start)*1000>150:
print("EXCEEDED TIME LIMIT: "+str((end-start)*1000))
sys.exit()
'''