Skip to content

Commit

Permalink
Boost
Browse files Browse the repository at this point in the history
  • Loading branch information
andrzejnovak committed Jun 19, 2018
1 parent f96cec9 commit bbc596c
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 30 deletions.
2 changes: 1 addition & 1 deletion DataCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ def createTestDataForDataCollection(
import copy
self.readFromFile(collectionfile)
self.dataclass.remove=False
self.dataclass.weight=True #False
self.dataclass.weight=False #True #False
self.readRootListFromFile(inputfile)
self.createDataFromRoot(
self.dataclass, outputDir, False,
Expand Down
2 changes: 1 addition & 1 deletion TrainData.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ def produceBinWeighter(self,filenames):
if self.remove or self.weight:
for fname in filenames:
nparray = self.readTreeFromRootToTuple(fname, branches=branches)
weighter.addDistributions(nparray)
weighter.addDistributions(nparray, referenceclass=self.referenceclass)
del nparray
showprog.show(counter)
counter=counter+1
Expand Down
92 changes: 64 additions & 28 deletions Weighter.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def setBinningAndClasses(self,bins,nameX,nameY,classes):
if len(self.classes)<1:
self.classes=['']

def addDistributions(self,Tuple):
def addDistributions(self,Tuple, referenceclass="flatten"):
import numpy
selidxs=[]

Expand All @@ -79,7 +79,11 @@ def addDistributions(self,Tuple):


for i in range(len(self.classes)):
tmphist,xe,ye=numpy.histogram2d(xtuple[selidxs[i]],ytuple[selidxs[i]],[self.axisX,self.axisY],normed=True)
if not referenceclass=="lowest":
tmphist,xe,ye=numpy.histogram2d(xtuple[selidxs[i]],ytuple[selidxs[i]],[self.axisX,self.axisY],normed=True)
else:
tmphist,xe,ye=numpy.histogram2d(xtuple[selidxs[i]],ytuple[selidxs[i]],[self.axisX,self.axisY])
#print(self.classes[i], xtuple[selidxs[i]], len(xtuple[selidxs[i]]))
self.xedges=xe
self.yedges=ye
if len(self.distributions)==len(self.classes):
Expand Down Expand Up @@ -117,7 +121,7 @@ def plotHist(hist,outname):
def createRemoveProbabilitiesAndWeights(self,referenceclass='isB'):
import numpy
referenceidx=-1
if not referenceclass=='flatten':
if referenceclass not in ['flatten', 'lowest']:
try:
referenceidx=self.classes.index(referenceclass)
except:
Expand Down Expand Up @@ -151,35 +155,41 @@ def divideHistos(a,b):

probhists=[]
weighthists=[]


bin_counts = []
for i in range(len(self.classes)):
bin_counts.append(self.distributions[i])
bin_min = numpy.array(numpy.minimum.reduce(bin_counts))
print("Minimimum events per bin")
print(bin_min)
print ("Weights:")
for i in range(len(self.classes)):
#print(self.classes[i])
tmphist=self.distributions[i]
#print(tmphist)
#print(refhist)
if numpy.amax(tmphist):
tmphist=tmphist/numpy.amax(tmphist)
else:
print('Warning: class '+self.classes[i]+' empty.')
ratio=divideHistos(refhist,tmphist)
ratio=ratio/numpy.amax(ratio)#norm to 1
#print(ratio)
if referenceclass=="lowest":
ratio=divideHistos(bin_min,tmphist)
else:
if numpy.amax(tmphist):
tmphist=tmphist/numpy.amax(tmphist)
else:
print('Warning: class '+self.classes[i]+' empty.')
ratio=divideHistos(refhist,tmphist)
ratio=ratio/numpy.amax(ratio)#norm to 1
ratio[ratio<0]=1
ratio[ratio==numpy.nan]=1
weighthists.append(ratio)
ratio=1-ratio#make it a remove probability
probhists.append(ratio)

print(self.classes[i])
print(ratio)
self.removeProbabilties=probhists
self.binweights=weighthists

#make it an average 1
for i in range(len(self.binweights)):
self.binweights[i]=self.binweights[i]/numpy.average(self.binweights[i])
#for i in range(len(self.binweights)):
# self.binweights[i]=self.binweights[i]/numpy.average(self.binweights[i])




def createNotRemoveIndices(self,Tuple):
import numpy
if len(self.removeProbabilties) <1:
Expand All @@ -193,39 +203,62 @@ def createNotRemoveIndices(self,Tuple):
xaverage=[]
norm=[]
yaverage=[]

count_out, count_rem = 0, 0

useonlyoneclass=len(self.classes)==1 and len(self.classes[0])==0


# print(self.classes )
for c in self.classes:
xaverage.append(0)
xaverage.append(0)
norm.append(0)
yaverage.append(0)



# print(self.removeProbabilties)
incomplete_class_phasespace = False
for jet in iter(Tuple[self.Axixandlabel]):
#print(jet)
binX = self.getBin(jet[self.nameX], self.axisX)
binY = self.getBin(jet[self.nameY], self.axisY)

out, rem = False, False
for index, classs in enumerate(self.classes):
# if counter < 5:
# print(index, classs, jet[classs])
# As you iterate over classes, produce index for when label is True
if useonlyoneclass or 1 == jet[classs]:
rand=numpy.random.ranf()
prob = self.removeProbabilties[index][binX][binY]

#print (prob)

#if counter < 5: print(rand, prob)
if jet[self.nameX] < self.axisX[0] or jet[self.nameY] < self.axisY[0] or jet[self.nameX] > self.axisX[-1] or jet[self.nameY] > self.axisY[-1]:
notremove[counter]=0
# will remove out of bounds events
out = True
# will remove out of bounds event
elif rand < prob and index != self.refclassidx:
notremove[counter]=0
rem = True
else:
#print('keep',rand,prob,index,self.refclassidx,jet[classs],classs,jet[self.nameX],jet[self.nameY],binX,binY)
notremove[counter]=1
xaverage[index]+=jet[self.nameX]
yaverage[index]+=jet[self.nameY]
norm[index]+=1
counter=counter+1


counter +=1
# If no label is True, remove event as undefined
if sum([jet[classs] for classs in self.classes])==0:
notremove[counter]=0
counter +=1
incomplete_class_phasespace
#if counter > 5: break
if out: count_out +=1
if rem: count_rem +=1
#counter=counter+1
print('Outside of bins: {} % , Randomly removed: {} %'.format(round(count_out/float(counter)*100), round(count_rem/float(counter)*100)) )

if incomplete_class_phasespace:
print("WARNING: Defined truth classes don't sum up to 1 in probability")
if not len(notremove) == counter:
raise Exception("tuple length must match remove indices length. Probably a problem with the definition of truth classes in the ntuple and the TrainData class")

Expand All @@ -252,7 +285,10 @@ def getJetWeights(self,Tuple):

for index, classs in enumerate(self.classes):
if 1 == jet[classs] or useonlyoneclass:
weight[jetcount]=(self.binweights[index][binX][binY])
if jet[self.nameX] < self.axisX[0] or jet[self.nameY] < self.axisY[0] or jet[self.nameX] > self.axisX[-1] or jet[self.nameY] > self.axisY[-1]:
weight[jetcount]=0
else:
weight[jetcount]=(self.binweights[index][binX][binY])

jetcount=jetcount+1

Expand Down

0 comments on commit bbc596c

Please sign in to comment.