-
Notifications
You must be signed in to change notification settings - Fork 0
/
2.CANDIDATE ELIMINATION ALGORITHM
226 lines (226 loc) · 7.51 KB
/
2.CANDIDATE ELIMINATION ALGORITHM
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
class Holder:
factors={} #Initialize an empty dictionary
attributes = () #declaration of dictionaries parameters with an arbitrary length
'''
Constructor of class Holder holding two parameters,
self refers to the instance of the class
'''
def __init__(self,attr): #
self.attributes = attr
for i in attr:
self.factors[i]=[]
def add_values(self,factor,values):
self.factors[factor]=values
class CandidateElimination:
Positive={} #Initialize positive empty dictionary
Negative={} #Initialize negative empty dictionary
def __init__(self,data,fact):
self.num_factors = len(data[0][0])
self.factors = fact.factors
self.attr = fact.attributes
self.dataset = data
def run_algorithm(self):
'''
Initialize the specific and general boundaries, and loop the dataset against the
algorithm
'''
G = self.initializeG()
S = self.initializeS()
'''
Programmatically populate list in the iterating variable trial_set
'''
count=0
for trial_set in self.dataset:
if self.is_positive(trial_set): #if trial set/example consists of positive examples
G = self.remove_inconsistent_G(G,trial_set[0]) #remove inconsitent data from
the general boundary
S_new = S[:] #initialize the dictionary with no key-value pair
print (S_new)
for s in S:
if not self.consistent(s,trial_set[0]):
S_new.remove(s)
generalization = self.generalize_inconsistent_S(s,trial_set[0])
generalization = self.get_general(generalization,G)
if generalization:
S_new.append(generalization)
S = S_new[:]
S = self.remove_more_general(S)
print(S)
else:#if it is negative
S = self.remove_inconsistent_S(S,trial_set[0]) #remove inconsitent data from
the specific boundary
G_new = G[:] #initialize the dictionary with no key-value pair (dataset can
take any value)
print (G_new)
for g in G:
if self.consistent(g,trial_set[0]):
G_new.remove(g)
specializations = self.specialize_inconsistent_G(g,trial_set[0])
specializationss = self.get_specific(specializations,S)
if specializations != []:
G_new += specializationss
G = G_new[:]
G = self.remove_more_specific(G)
print(G)
print (S)
print (G)
def initializeS(self):
''' Initialize the specific boundary '''
S = tuple(['-' for factor in range(self.num_factors)]) #6 constraints in the vector
return [S]
def initializeG(self):
''' Initialize the general boundary '''
G = tuple(['?' for factor in range(self.num_factors)]) # 6 constraints in the vector
return [G]
def is_positive(self,trial_set):
''' Check if a given training trial_set is positive '''
if trial_set[1] == 'Y':
return True
elif trial_set[1] == 'N':
return False
else:
raise TypeError("invalid target value")
def match_factor(self,value1,value2):
''' Check for the factors values match,
necessary while checking the consistency of
training trial_set with the hypothesis '''
if value1 == '?' or value2 == '?':
return True
elif value1 == value2 :
return True
return False
def consistent(self,hypothesis,instance):
''' Check whether the instance is part of the hypothesis '''
for i,factor in enumerate(hypothesis):
if not self.match_factor(factor,instance[i]):
return False
return True
def remove_inconsistent_G(self,hypotheses,instance):
''' For a positive trial_set, the hypotheses in G
inconsistent with it should be removed '''
G_new = hypotheses[:]
for g in hypotheses:
if not self.consistent(g,instance):
G_new.remove(g)
return G_new
def remove_inconsistent_S(self,hypotheses,instance):
''' For a negative trial_set, the hypotheses in S
inconsistent with it should be removed '''
S_new = hypotheses[:]
for s in hypotheses:
if self.consistent(s,instance):
S_new.remove(s)
return S_new
def remove_more_general(self,hypotheses):
''' After generalizing S for a positive trial_set, the hypothesis in S
general than others in S should be removed '''
S_new = hypotheses[:]
for old in hypotheses:
for new in S_new:
if old!=new and self.more_general(new,old):
S_new.remove[new]
return S_new
def remove_more_specific(self,hypotheses):
''' After specializing G for a negative trial_set, the hypothesis in G
specific than others in G should be removed '''
G_new = hypotheses[:]
for old in hypotheses:
for new in G_new:
if old!=new and self.more_specific(new,old):
G_new.remove[new]
return G_new
def generalize_inconsistent_S(self,hypothesis,instance):
''' When a inconsistent hypothesis for positive trial_set is seen in the specific
boundary S,
it should be generalized to be consistent with the trial_set ... we will get one
hypothesis'''
hypo = list(hypothesis) # convert tuple to list for mutability
for i,factor in enumerate(hypo):
if factor == '-':
hypo[i] = instance[i]
elif not self.match_factor(factor,instance[i]):
hypo[i] = '?'
generalization = tuple(hypo) # convert list back to tuple for immutability
return generalization
def specialize_inconsistent_G(self,hypothesis,instance):
''' When a inconsistent hypothesis for negative trial_set is seen in the general
boundary G
should be specialized to be consistent with the trial_set.. we will get a set of
hypotheses '''
specializations = []
hypo = list(hypothesis) # convert tuple to list for mutability
for i,factor in enumerate(hypo):
if factor == '?':
values = self.factors[self.attr[i]]
for j in values:
if instance[i] != j:
hyp=hypo[:]
hyp[i]=j
hyp=tuple(hyp) # convert list back to tuple for immutability
specializations.append(hyp)
return specializations
def get_general(self,generalization,G):
''' Checks if there is more general hypothesis in G
for a generalization of inconsistent hypothesis in S
in case of positive trial_set and returns valid generalization '''
for g in G:
if self.more_general(g,generalization):
return generalization
return None
def get_specific(self,specializations,S):
''' Checks if there is more specific hypothesis in S
for each of hypothesis in specializations of an
inconsistent hypothesis in G in case of negative trial_set
and return the valid specializations'''
valid_specializations = []
for hypo in specializations:
for s in S:
if self.more_specific(s,hypo) or s==self.initializeS()[0]:
valid_specializations.append(hypo)
return valid_specializations
def exists_general(self,hypothesis,G):
'''Used to check if there exists a more general hypothesis in
general boundary for version space'''
for g in G:
if self.more_general(g,hypothesis):
return True
return False
def exists_specific(self,hypothesis,S):
'''Used to check if there exists a more specific hypothesis in
general boundary for version space'''
for s in S:
if self.more_specific(s,hypothesis):
return True
return False
def more_general(self,hyp1,hyp2):
''' Check whether hyp1 is more general than hyp2 '''
hyp = zip(hyp1,hyp2)
for i,j in hyp:
if i == '?':
continue
elif j == '?':
if i != '?':
return False
elif i != j:
return False
else:
continue
return True
def more_specific(self,hyp1,hyp2):
''' hyp1 more specific than hyp2 is
equivalent to hyp2 being more general than hyp1 '''
return self.more_general(hyp2,hyp1)
dataset=[(('sunny','warm','normal','strong','warm','same'),'Y'),(('sunny','warm','high','stron
g','warm','same'),'Y'),(('rainy','cold','high','strong','warm','change'),'N'),(('sunny','warm','hi
gh','strong','cool','change'),'Y')]
attributes =('Sky','Temp','Humidity','Wind','Water','Forecast')
f = Holder(attributes)
f.add_values('Sky',('sunny','rainy','cloudy')) #sky can be sunny rainy or cloudy
f.add_values('Temp',('cold','warm')) #Temp can be sunny cold or warm
f.add_values('Humidity',('normal','high')) #Humidity can be normal or high
f.add_values('Wind',('weak','strong')) #wind can be weak or strong
f.add_values('Water',('warm','cold')) #water can be warm or cold
f.add_values('Forecast',('same','change')) #Forecast can be same or change
a = CandidateElimination(dataset,f) #pass the dataset to the algorithm class and call the
run algoritm method
a.run_algorithm()