diff --git a/README.md b/README.md index 7a99549..508e1ab 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ repo for object-oriented datacard maker Writes Datacards Current structure: -- src/datacardMaker.py: Wrapper to automatically create data cards from category/process/systematic objects +- src/datacardMaker.py: Wrapper to automatically create data cards from analysis objects +- src/analysisObject.py: Container for categoryObjects - src/categoryObject.py: Object for categories that contains processes - src/processObject.py: Object for process (e.g. ttH_hbb), contains uncertainties corresponding to the process - src/systematicObject.py: object for nuisance parameters (e.g. bgnorm_ttbarPlusBBbar) diff --git a/base/identificationLogic.py b/base/identificationLogic.py index a7897db..0af9d1e 100644 --- a/base/identificationLogic.py +++ b/base/identificationLogic.py @@ -1,4 +1,3 @@ -from sys import exit class identificationLogic(object): """ The identificationLogic class is meant to handle all logic concerning @@ -150,9 +149,6 @@ def insert_channel(self, channel_name, base_key): print "-"*130, "\nDEBUG: key after channel insertion:", s print "-"*130 return s - else: - print "UPS" - exit(0) return base_key def insert_process(self, process_name, base_key): diff --git a/base/valueConventions.py b/base/valueConventions.py index a9dff79..e6abf90 100644 --- a/base/valueConventions.py +++ b/base/valueConventions.py @@ -10,6 +10,8 @@ class valueConventions(object): def __init__(self): print "Initializing valueConventions" + self._allowed_types = ["lnN", "shape"] + self._debug = 99 def isfloat(self, value): try: @@ -34,4 +36,11 @@ def is_good_systval(self, value): is_good = all(self.isfloat(v) for v in totest) if not is_good: print "Given value not suitable for an uncertainty in a datacard!" - return is_good \ No newline at end of file + return is_good + + def is_allowed_type(self, typ): + if typ in self._allowed_types: + return True + if self._debug >= 99: + print "ERROR: Uncertainty type '%s' is not allowed" + return False \ No newline at end of file diff --git a/src/README.md b/src/README.md index 375c015..33a293b 100644 --- a/src/README.md +++ b/src/README.md @@ -1,12 +1,12 @@ # datacardMaker src classes for object-oriented datacard maker -- datacardMaker.py: Wrapper to automatically create data cards from category/process/systematic objects - - a datacard maker object is able to have multiple categories with multiple processes with corresponding systematics +- datacardMaker.py: Wrapper to automatically create data cards from analysis objects + - write Datacard with write_datacard() +- analysisObject.py: Container for categoryObjects + - an analysis object is able to have multiple categories with multiple processes with corresponding systematics - uses category, process and systematic objects - add category with add_category() - - write Datacard with write_datacard() - - read Datacard with load_from_file() - categoryObject.py: object for categories - a category has multiple processes with a number of systematic uncertainties - categoryObject knows generic keys for files and histogram names for data and uncertainties diff --git a/src/analysisObject.py b/src/analysisObject.py index c0fdf6f..a53385c 100644 --- a/src/analysisObject.py +++ b/src/analysisObject.py @@ -13,7 +13,7 @@ class analysisObject(object): _debug = 200 def init_variables(self): - self._categories = {} + self._categories = {} self._systematics = {} def __init__( self, pathToDatacard = "", @@ -22,88 +22,191 @@ def __init__( self, pathToDatacard = "", systIdentifier = "$SYSTEMATIC"): self.init_variables() if pathToDatacard: - self.load_from_file(pathToDatacard) + self.load_from_datacard(pathToDatacard) + + @property + def systematics(self): + """ + get dictionary of systematics for the analysisObject + """ + self.update_systematics() + return self._systematics + + @property + def categories(self): + """ + get dictionary of categories for the analysisObject + """ + return self._categories def add_category(self, category): - """ - Adds a category object - """ + """ + Adds a category object + """ if isinstance(category, categoryObject): catname = category.name if not catname in self._categories: self._categories[catname] = category # self.update_systematics(category = category) else: - print "ERROR: Category %s is known to this datacard!" % catname + print "ERROR: Category %s is known to this analysisObject!" % catname else: print "ERROR: Input required to be instance of categoryObject!" - def create_category(self,categoryName,default_file=None,generic_key_systematic_hist=None, - generic_key_nominal_hist=None): + def create_category(self,categoryName,default_file=None, + generic_key_systematic_hist=None, + generic_key_nominal_hist=None): """ Initializes a categoryObject with default file and generic keys. """ self._categories[categoryName] = categoryObject(categoryName=categoryName, - defaultRootFile=default_file,systkey=generic_key_systematic_hist, - defaultnominalkey=generic_key_nominal_hist) + defaultRootFile=default_file, + systkey=generic_key_systematic_hist, + defaultnominalkey=generic_key_nominal_hist) if self._debug >= 50: print "initialized category", categoryName print self._categories[categoryName] + def delete_category(self,categoryName): + if isinstance(categoryName,list): + self.delete_categories(list_of_categories=categoryName) + elif isinstance(categoryName,str): + if categoryName in self._categories: + del self._categories[categoryName] + else: + if self._debug>30: + print "DEBUG: Category %s doesnt exist" % categoryName + + def delete_categories(self,list_of_categories): + for category in list_of_categories: + self.delete_category(categoryName=category) + + def add_signal_process(self, process, categoryName=None): - """ - Adds a signal process and creates a categoryObject where the processObject is stored - if there is no categoryObject. - """ - if categoryName==None: - categoryName=process.category - if not categoryName in self._categories: - self.create_category(categoryName=categoryName) - self._categories[category].add_signal_process(process) - - def add_background_process(self, process, categoryName=None): - """ - Adds a background process and creates a categoryObject where the processObject is stored - if there is no categoryObject. - """ - if categoryName==None: - categoryName=process.category - if not categoryName in self._categories: - self.create_category(categoryName=categoryName) - self._categories[category].add_background_process(process) - - - def create_process(self,categoryName,processName,processType,file=None,key_nominal_hist=None,key_systematic_hist=None): + """ + Adds a signal process and creates a categoryObject where the processObject is stored + if there is no categoryObject. Maps to the categoryObject function. + Logic if process is already known is found there. + """ + if categoryName is None: + categoryName=process.category + if categoryName in self._categories: + self._categories[category].add_signal_process(process) + else: + print "ERROR: No category %s in analysisObject" % categoryName + + def create_signal_process(self,categoryName,processName, + file=None,key_nominal_hist=None, + key_systematic_hist=None): """ Adds a signal or background process dependant on the value x of the processType - (x<=0 signal process, x>=1 background process) - If no list of file and key names is handed over, it uses the default information of the category object - to initialize a process. + (x<=0 signal process, x>=1 background process) + Maps to the categoryObjects function. + If no list of file and key names is handed over, it uses the default information + of the category object in the categoryObject to initialize a process. """ if self._debug>100: print key_nominal_hist print key_systematic_hist - if int(processType)<=0: - self._categories[categoryName].create_signal_process(processName=processName, - rootfile=file,histoname=key_nominal_hist,systkey=key_systematic_hist) - print "-"*130 - print "DEBUG: created process", processName - print self._categories[categoryName][processName] - print "-"*130 - elif int(processType)>0: - self._categories[categoryName].create_background_process(processName=processName, - rootfile=file,histoname=key_nominal_hist,systkey=key_systematic_hist) + + self._categories[categoryName].create_signal_process(processName=processName, + rootfile=file,histoname=key_nominal_hist,systkey=key_systematic_hist) + + if self._debug >= 50: + print "initialized process", processName, "in category", categoryName + print self._categories[categoryName] + def add_background_process(self, process, categoryName=None): + """ + Adds a background process and creates a categoryObject where the processObject is stored + if there is no categoryObject. Maps to the categoryObject function. + Logic if process is already known is found there. + """ + if categoryName is None: + categoryName=process.category + if categoryName in self._categories: + self._categories[category].add_background_process(process) + else: + print "ERROR: No category %s in analysisObject" % categoryName + + + def create_background_process(self,categoryName,processName, + file=None,key_nominal_hist=None, + key_systematic_hist=None): + """ + Adds a signal or background process dependant on the value x of the processType + (x<=0 signal process, x>=1 background process) + Maps to the categoryObjects function. + If no list of file and key names is handed over, it uses the default information + of the category object in the categoryObject to initialize a process. + """ + if self._debug>100: + print key_nominal_hist + print key_systematic_hist + + self._categories[categoryName].create_background_process(processName=processName, + rootfile=file,histoname=key_nominal_hist,systkey=key_systematic_hist) + if self._debug >= 50: print "initialized process", processName, "in category", categoryName print self._categories[categoryName] + def delete_processes_for_all_categories(self,list_of_processes): + for process in list_of_processes: + self.delete_process_for_all_categories(processName=process) + + def delete_process_for_all_categories(self,processName): + if isinstance(processName,list): + self.delete_processes_for_all_categories(list_of_processes=processName) + elif isinstance(processName,str): + for category in self._categories: + self._categories[category].delete_process(processName=processName) + + + def delete_uncertainty_for_all_processes(self,systematic): + if isinstance(systematic,list): + self.delete_uncertainties_for_all_processes(list_of_systematics=systematic) + elif isinstance(systematic,str): + for category in self._categories: + for process in self._categories[category]: + self._categories[category][process].delete_uncertainty(systematicName=systematic) + if systematic in self._systematics: + del self._systematics[systematic] + + def delete_uncertainties_for_all_processes(self,list_of_systematics): + for systematic in list_of_systematics: + self.delete_uncertainty_for_all_processes(systematic=systematic) + def update_systematics(self): + self._systematics.clear() + for category in self._categories: + self._collect_uncertainties(process_dict = self._categories[category].signal_processes) + self._collect_uncertainties(process_dict = self._categories[category].background_processes) - def load_from_file(self, pathToDatacard): + + def _collect_uncertainties(self, process_dict): + """ + Loop over all process in the dictionary 'process_dict' and save + the respective systematic uncertainties and correlations + """ + for process_name in process_dict: + process = process_dict[process_name] + for syst in process.uncertainties: + #first, check if uncertainty is already known + #if not, create new systematicsObject + if not syst in self._systematics: + self._systematics[syst] = systematicObject(name = syst, + nature = process.get_uncertainty_type(syst)) + self._systematics[syst].add_process(process = process) + + + """ + Function to create analysisObject from datacard + """ + def load_from_datacard(self, pathToDatacard): """ - Reads datacard from file. Creates categoryObjects for each category and - processObjects for the corresponding processes. + Reads datacard from datacard. Creates categoryObjects for each category + and processObjects for the corresponding processes. Adds filename, nominal histname and systematic histname. Adds systematics for corresponding processes. """ @@ -112,30 +215,34 @@ def load_from_file(self, pathToDatacard): #Read datacard from file. with open(pathToDatacard) as datacard: lines = datacard.read().splitlines() - self._shapelines_ = [] - self._systematics_ = [] - self._processes_ = "" - self._binprocesses_ = "" - self._processtype_ = "" - self._header = [] - self._bins = "" - self._observation = "" - + shape_lines = [] + systematic_lines = [] + process_line = "" + categoryprocess_line = "" + processtype_line = "" + categories_line = "" + #header_lines are not used right now + header_lines = [] + #observation_line is not used right now + observation_line = "" + autoMCStats_lines = [] + group_lines = [] + header_identifier = ["#Combination", "imax", "kmax", "jmax"] for n, line in enumerate(lines): #missing lines for advanced datacards, only working for simple ones if line.startswith("-"): continue - elif line.startswith("Combination") or line.startswith("imax") or line.startswith("kmax") or line.startswith("jmax"): - self._header.append(line) + elif any(line.startswith(x) for x in header_identifier): + header_lines.append(line) elif line.startswith("bin") and n != len(lines) and lines[n+1].startswith("observation"): - self._bins = line - self._observation = lines[n+1] + categories_line = line + observation_line = lines[n+1] elif line.startswith("shapes"): - self._shapelines_.append(line) + shape_lines.append(line) elif line.startswith("process") and n != 0 and lines[n-1].startswith("bin"): - self._processes_= line - self._binprocesses_= lines[n-1] - self._processtype_ = lines[n+1] + process_line= line + categoryprocess_line= lines[n-1] + processtype_line = lines[n+1] elif line.startswith("bin") and lines[n+1].startswith("process"): pass elif line.startswith("process") and lines[n+1].startswith("rate"): @@ -143,60 +250,71 @@ def load_from_file(self, pathToDatacard): elif line.startswith("observation") or line.startswith("rate"): pass elif "autoMCStats" in line: + autoMCStats_lines.append(line) + elif line.split()[1] is "group": + group_lines.append(line) + elif line.startswith("#"): pass else: - self._systematics_.append(line) - - #Create categoryObject for each category - #first cleanup lines - categories=self._bins.split() + systematic_lines.append(line) + """ + Create categoryObject for each category + first cleanup lines + """ + categories=categories_line.split() categories.pop(0) - self.load_from_file_add_categories(list_of_categories= categories) + self._load_from_datacard_add_categories(list_of_categories= categories, + list_of_shapelines=shape_lines) - #Create processObjects for each process in a category - #and add it to its correspoding categoryObjects - #first cleanup lines - processes = self._processes_.split() + """ + Create processObjects for each process in a category + and add it to its correspoding categoryObjects + first cleanup lines + """ + processes = process_line.split() processes.pop(0) - categoryprocesses = self._binprocesses_.split() + categoryprocesses = categoryprocess_line.split() categoryprocesses.pop(0) - processtypes = self._processtype_.split() + processtypes = processtype_line.split() processtypes.pop(0) - #checks if file is properly written - assert len(processes)==len(categoryprocesses) - assert len(processes)==len(processtypes) - #add processes to categories - self.load_from_file_add_processes(list_of_categories=categoryprocesses, - list_of_processes=processes, list_of_processtypes=processtypes) - - # #adds systematics to processes - self.load_from_file_add_systematics(list_of_categories=categoryprocesses, - list_of_processes=processes) + + """ + add processes to categories + """ + self._load_from_datacard_add_processes(list_of_categories=categoryprocesses, + list_of_processes=processes, list_of_processtypes=processtypes, + list_of_shapelines=shape_lines) + """ + adds systematics to processes + """ + self._load_from_datacard_add_systematics(list_of_categories=categoryprocesses, + list_of_processes=processes,list_of_systematics=systematic_lines) + else: print "could not load %s: no such file" % pathToDatacard - def load_from_file_add_categories(self,list_of_categories): + def _load_from_datacard_add_categories(self,list_of_categories,list_of_shapelines): """ Line for categories: careful with combined categories, key logic wont be working cause channels will be numerated original name in Combination line """ - for shapelines in self._shapelines_: + for shapelines in list_of_shapelines: shape = shapelines.split() category_name = shape[2] process_name = shape[1] - file = shape[3] - histname = shape[4] - systname = shape[5] - if category_name=="*" and process_name=="*": + file = shape[3] + histname = shape[4] + systname = shape[5] + if category_name is"*" and process_name is "*": for category in list_of_categories: self.create_category(categoryName=category, default_file=file,generic_key_systematic_hist=systname, generic_key_nominal_hist=histname) - elif category_name in list_of_categories and process_name== "*": + elif category_name in list_of_categories and process_name is "*": self.create_category(categoryName=category_name, default_file=file,generic_key_systematic_hist=systname, generic_key_nominal_hist=histname) @@ -205,37 +323,56 @@ def load_from_file_add_categories(self,list_of_categories): self._categories[categoryName]=categoryObject(categoryName=category) - def load_from_file_add_processes(self,list_of_processes,list_of_categories,list_of_processtypes): + def _load_from_datacard_add_processes(self,list_of_processes,list_of_categories, + list_of_processtypes,list_of_shapelines): """ Adds processes to the corresponding categories. Initializes process with file and key information. """ - for shapelines in self._shapelines_: + for shapelines in list_of_shapelines: shape = shapelines.split() category_name = shape[2] process_name = shape[1] - file = shape[3] - histname = shape[4] - systname = shape[5] - #if the process is explicitly written in the file, initialize process with file and key information of the readout file + file = shape[3] + histname = shape[4] + systname = shape[5] + """ + if the process is explicitly written in the file, + initialize process with file and key information + of the datacard + """ for category,process,processtype in zip(list_of_categories,list_of_processes,list_of_processtypes): - if (category_name==category and process_name ==process) or (category_name=="*" and process_name==process): - self.create_process(categoryName=category, processName=process_name, - processType=processtype, file=file, - key_nominal_hist=histname, key_systematic_hist=systname) - # if the process is not explicitly written in the file, initialize process with - # the generic keys and default file of the corresponding category + if (category_name is category and process_name is process) or (category_name is "*" and process_name is process): + if int(processtype)<=0: + self.create_signal_process(categoryName=category, + processName=process_name,file=file, + key_nominal_hist=histname, + key_systematic_hist=systname) + elif int(processtype)>0: + self.create_background_process(categoryName=category, + processName=process_name, file=file, + key_nominal_hist=histname, key_systematic_hist=systname) + """ + if the process is not explicitly written in the file, + initialize process with the generic keys and default file + of the corresponding category + """ for category,process,processtype in zip(list_of_categories,list_of_processes,list_of_processtypes): if not process in self._categories[category]: - self.create_process(categoryName=category,processName=process,processType=processtype) + if int(processtype)<=0: + self.create_signal_process(categoryName=category, + processName=process) + elif int(processtype)>0: + self.create_background_process(categoryName=category, + processName=process) - def load_from_file_add_systematics(self,list_of_categories,list_of_processes): + def _load_from_datacard_add_systematics(self,list_of_categories,list_of_processes,list_of_systematics): """ One line for one systematic, knows type: adds systematic to process with value given in the file """ - for systematics in self._systematics_: + for systematics in list_of_systematics: systematic = systematics.split() sys=systematic[0] typ=systematic[1] @@ -246,4 +383,54 @@ def load_from_file_add_systematics(self,list_of_categories,list_of_processes): self._categories[category][process].add_uncertainty( syst = sys, typ = typ, value = value) + """ + Function to add processes to analysisObject from CSV File + Categories in analysis object have to be declared first! + """ + def load_from_csv_file(self,filename): + if self._categories: + for category in self._categories: + self._categories[category].add_from_csv(filename) + else: + print "-"*130 + print "ERROR: no categories in analysisObject! Create categoryObjects first!" + print "-"*130 + + + def __str__(self): + s = [] + s.append("List of Categories:") + s.append("_"*30) + for category in self._categories: + s.append("%s" % self._categories[category]) + s.append("_"*30) + return "\n".join(s) + + """ + overloaded get, in and for operator to get better access to categories in + analysis object: + self[categoryName] + can also use: + self[categoryName][processName] + and: + self[categoryName][processName][systematicName] + """ + + def __getitem__(self, categoryName): + + if categoryName in self._categories: + return self._categories[categoryName] + else: + print "ERROR: Process not in Category!" + + def __iter__(self): + all_categories=self._categories + return all_categories.__iter__() + + def __contains__(self, categoryName): + if categoryName in self._categories: + return True + else: + return False + diff --git a/src/categoryObject.py b/src/categoryObject.py index 5802e70..35ea8d8 100644 --- a/src/categoryObject.py +++ b/src/categoryObject.py @@ -19,6 +19,10 @@ def init_variables(self): self._key_creator = identificationLogic() self._key_creator.belongs_to = "channel" self._default_file = None + self._autoMCstats = False + self._autoMCstats_threshold = 5 + self._autoMCstats_include_signal = 0 + self._autoMCstats_hist_mode = 1 def __init__( self, categoryName=None, defaultRootFile=None, defaultnominalkey=None, @@ -53,45 +57,21 @@ def __init__( self, categoryName=None, defaultRootFile=None, if path.exists(defaultRootFile): self.default_file = defaultRootFile - - - # #check if process/channel identifiers are in nominal histo key - # self.is_part_of(self._procIden, self._nomkey) - # if self.is_part_of(self._chIden, self._nomkey): - # self._nomkey = self._nomkey.replace(self._chIden, self._name) - - # #check if systematics/process/channel identifiers - # #are in systematics histo key - # if not self.is_part_of(self._systIden, self._systkey): - # print "WARNING: Identifier for systematics not part of SystKey!" - # if self.is_part_of(self._chIden, self._systkey): - # self._systkey = self._systkey.replace(self._chIden, self._name) - - - #if a list of signal processes is given, add them with default vals + """ + if a list of signal processes is given, add them with default vals + """ if dict_of_signals: for proc in dict_of_signals: self.add_signal_process(name = proc, rootfile = defaultRootFile) - - #if a list of bkg processes is given, add them with default vals + """ + if a list of bkg processes is given, add them with default vals + """ if dict_of_bkgs: for proc in dict_of_bkgs: self.add_background_process(name = proc, rootfile = defaultRootFile) - # def is_part_of(self, identifier, key): - # if identifier in key: - # if self._debug: - # s = "Identifier '%s' is part of " % identifier - # s += "keyword '%s'" % key - # print s - # return True - # else: - # if self._debug: - # s = "Identifier '%s' is not part of " % identifier - # s += "keyword '%s'" % key - # print s - # return False + @property def n_signal_procs(self): @@ -111,7 +91,6 @@ def name(self, val): @property def observation(self): return self._data_obs - @observation.setter def observation(self, data_obs): if isinstance(data_obs, processObject): @@ -158,7 +137,6 @@ def generic_key_systematic_hist(self, key): @property def default_file(self): return self._default_file - @default_file.setter def default_file(self, filepath): if path.exists(filepath): @@ -166,27 +144,71 @@ def default_file(self, filepath): else: print "ERROR: File '%s' does not exist!" % filepath + """ + get and set flag for autoMCstats + """ + @property + def autoMCstats(self): + return self._autoMCstats + @autoMCstats.setter + def autoMCstats(self, flag): + self._autoMCstats=flag + + """ + get and set value for autoMCstats threshold + """ + @property + def autoMCstats_threshold(self): + return self._autoMCstats_threshold + @autoMCstats_threshold.setter + def autoMCstats_threshold(self, value): + self._autoMCstats_threshold = value + + """ + get and set signal include for autoMCstats + """ + @property + def autoMCstats_include_signal(self): + return self._autoMCstats_include_signal + @autoMCstats_include_signal.setter + def autoMCstats_include_signal(self, value): + self._autoMCstats_include_signal = value + + """ + get and set hist mode for autoMCstats + """ + @property + def autoMCstats_hist_mode(self): + return self._autoMCstats_hist_mode + @autoMCstats_hist_mode.setter + def autoMCstats_hist_mode(self, value): + self._autoMCstats_hist_mode = value + + def create_signal_process( self, processName, rootfile = None, histoname = None, systkey = None): """ - add a signal process. Calls function add_process with + add a signal process. Calls function _create_process with list of signal processes """ - self._signalprocs[processName]=self.create_process( processName = processName, + self._create_process(processName = processName, + dic = self._signalprocs, rootfile = rootfile, histoname = histoname, systkey = systkey) def create_background_process( self, processName, rootfile = None, histoname = None, systkey = None): """ - add a background process. Calls function add_process with + add a background process. Calls function _create_process with list of background processes - """ - self._bkgprocs[processName]=self.create_process( processName = processName, + """ + self._create_process(processName = processName, + dic=self._bkgprocs, rootfile = rootfile, histoname = histoname, - systkey = systkey) + systkey = systkey) + - def create_process(self, processName , rootfile = None, + def _create_process(self, processName, dic, rootfile = None, histoname = None, systkey = None): categoryName=self.name if histoname is None: @@ -195,14 +217,16 @@ def create_process(self, processName , rootfile = None, systkey = self.generic_key_systematic_hist if rootfile is None: rootfile = self.default_file - print "-"*130 - print "DEBUG PROCESSOBJECT: creating process" - print "histoname =", histoname - print "-"*130 - return processObject(processName=processName, categoryName=categoryName, + if self._debug>99: + print "-"*130 + print "DEBUG PROCESSOBJECT: creating process" + print "histoname =", histoname + print "-"*130 + processObj=processObject(processName=processName, categoryName=categoryName, pathToRootfile = rootfile, nominal_hist_key = histoname, systematic_hist_key = systkey) + self._add_process(dic=dic, process=processObj) def add_signal_process( self, process): @@ -210,7 +234,7 @@ def add_signal_process( self, process): add a signal process. Calls function add_process with list of signal processes """ - self.add_process( dic = self._signalprocs, process = process) + self._add_process(dic = self._signalprocs, process = process) def add_background_process( self, process): """ @@ -218,9 +242,9 @@ def add_background_process( self, process): list of background processes """ - self.add_process( dic = self._bkgprocs, process = process) + self._add_process(dic = self._bkgprocs, process = process) - def add_process(self, dic, process): + def _add_process(self, dic, process): if isinstance(process, processObject): if not process.name in dic: if self._debug >= 99: @@ -237,6 +261,24 @@ def add_process(self, dic, process): else: print "ERROR: Category can only contain processes!" + def delete_processes(self,list_of_processes): + for process in list_of_processes: + self.delete_process(processName=process) + if self._debug>30: + print "DEBUG: deleted process %s" % processName + + def delete_process(self,processName): + if processName in self._signalprocs: + del self._signalprocs[processName] + if self._debug>30: + print "DEBUG: deleted process %s in category %s" % (processName,self.name) + elif processName in self._bkgprocs: + del self._bkgprocs[processName] + if self._debug>30: + print "DEBUG: deleted process %s in category %s" % (processName,self.name) + else: + print "ERROR: no process %s found in category %s" % (processName,self.name) + def is_compatible_with_default(self, process): """ @@ -254,39 +296,65 @@ def is_compatible_with_default(self, process): return (nominal_is_compatible and systematic_is_compatible) - def add_from_csv(self,pathToFile,signaltag="ttH"): + def add_from_csv(self,pathToFile,signaltag="ttH",lumi=1.025, bgnorm=1.5): + """ + + """ with open(pathToFile, mode="r") as csv_file: csv_reader = pandas.read_csv(pathToFile, skipinitialspace=True,) processes=list(csv_reader) - #get rid of uncertainty and type entry to get processes + """ + get rid of uncertainty and type entry to get processes + """ typ_label=processes[1] processes.pop(1) uncertainty_label=processes[0] processes.pop(0) + """ + initialize processes + """ for process in processes: if not process in self: - temp_process=self.create_process(processName=process) + if signaltag in process: + self.create_signal_process(processName=process) + else: + self.create_background_process(processName=process) else: print "found process", process temp_process = self[process] - + """ + add uncertainties to process + """ for uncertainty,typ,value in zip(csv_reader[uncertainty_label],csv_reader[typ_label],csv_reader[process]): - value = value.replace(" ", "") - typ = typ.replace(" ", "") - uncertainty = uncertainty.replace(" ", "") + value = value.strip() + typ = typ.strip() + uncertainty = uncertainty.strip() if self._debug >= 99: print "DEBUG: adding combination ({0},\t{1},\t{2}) for {3}".format(uncertainty,typ,value, process) + """ + lumi and bgnorm uncertainties can be used as an argument, not set in CSV files + """ if "lumi" in uncertainty and (value == "x" or value == "X"): - value = 1.025 + value = lumi print "changing value to", value elif "bgnorm" in uncertainty and (value == "x" or value == "X"): - value = 1.5 + value = bgnorm print "changing value to", value - temp_process.add_uncertainty(syst=uncertainty,typ=typ,value=value) - if signaltag in process: - self.add_signal_process(temp_process) - else: - self.add_background_process(temp_process) + if not value is "-": + if uncertainty in self[process]._uncertainties: + if self._debug >=30: + print "DEBUG: setting {0} to \t{1} and \t{2} for process {3}".format(uncertainty,typ,value, process) + self[process].set_uncertainty(syst=uncertainty,typ=typ,value=value) + else: + self[process].add_uncertainty(syst=uncertainty,typ=typ,value=value) + + """ + overloaded get, in and for operator to get better access to processes in + category object: + self[processName] + can also do: + self[processName][systematicName] + """ def __getitem__(self, process): diff --git a/src/datacardMaker.py b/src/datacardMaker.py index 6654c73..214f304 100644 --- a/src/datacardMaker.py +++ b/src/datacardMaker.py @@ -6,6 +6,7 @@ if not directory in sys.path: sys.path.append(directory) +from analysisObject import analysisObject from categoryObject import categoryObject from processObject import processObject from systematicObject import systematicObject @@ -13,23 +14,24 @@ class datacardMaker(object): _debug = 200 def init_variables(self): - self._header = [] - self._bins = "" - self._observation = "" - self._categories = {} - self._systematics = {} - self._hardcode_numbers = False - self._replace_files = False - self._outputpath = "" self._block_separator = "\n" + "-"*130 + "\n" + - def __init__( self, pathToDatacard = "", + def __init__( self, analysis, + outputpath = "", replacefiles=False, + hardcodenumbers=False, processIdentifier = "$PROCESS", channelIdentifier = "$CHANNEL", systIdentifier = "$SYSTEMATIC"): self.init_variables() - if pathToDatacard: - self.load_from_file(pathToDatacard) + if replacefiles: + self.replace_files = replacefiles + if hardcodenumbers: + self.hardcode_numbers = hardcodenumbers + if outputpath: + self.outputpath = outputpath + self.write_datacard(analysis) + @property def hardcode_numbers(self): @@ -84,240 +86,58 @@ def outputpath(self, outpath): def block_separator(self): return self._block_separator @block_separator.setter - def block_separator(self, sep): - self._block_separator = sep - - - def add_category(self, category): - if isinstance(category, categoryObject): - catname = category.name - if not catname in self._categories: - self._categories[catname] = category - # self.update_systematics(category = category) - else: - print "ERROR: Category %s is known to this datacard!" % catname - else: - print "ERROR: Input required to be instance of categoryObject!" - - - def load_from_file(self, pathToDatacard): - """ - Reads datacard from file. Creates categoryObjects for each category and - processObjects for the corresponding processes. - Adds filename, nominal histname and systematic histname. - Adds systematics for corresponding processes. - """ - if path.exists(pathToDatacard): - print "loading datacard from", pathToDatacard - #Read datacard from file. - with open(pathToDatacard) as datacard: - lines = datacard.read().splitlines() - self._shapelines_ = [] - self._systematics_ = [] - self._processes_= "" - self._binprocesses_= "" - self._processtype_ = "" - for n, line in enumerate(lines): - #missing lines for advanced datacards, only working for simple ones - if line.startswith("-"): - continue - elif line.startswith("Combination") or line.startswith("imax") or line.startswith("kmax") or line.startswith("jmax"): - self._header.append(line) - elif line.startswith("bin") and n != len(lines) and lines[n+1].startswith("observation"): - self._bins = line - self._observation = lines[n+1] - elif line.startswith("shapes"): - self._shapelines_.append(line) - elif line.startswith("process") and n != 0 and lines[n-1].startswith("bin"): - self._processes_= line - self._binprocesses_= lines[n-1] - self._processtype_ = lines[n+1] - elif line.startswith("bin") and lines[n+1].startswith("process"): - pass - elif line.startswith("process") and lines[n+1].startswith("rate"): - pass - elif line.startswith("observation") or line.startswith("rate"): - pass - elif "autoMCStats" in line: - pass - else: - self._systematics_.append(line) - - #Create categoryObject for each category - #first cleanup lines - categories=self._bins.split() - categories.pop(0) - self.load_from_file_add_categories(list_of_categories= categories) - - #Create processObjects for each process in a category - #and add it to its correspoding categoryObjects - #first cleanup lines - processes = self._processes_.split() - processes.pop(0) - categoryprocesses = self._binprocesses_.split() - categoryprocesses.pop(0) - processtypes = self._processtype_.split() - processtypes.pop(0) - #checks if file is properly written - assert len(processes)==len(categoryprocesses) - assert len(processes)==len(processtypes) - #add processes to categories - self.load_from_file_add_processes(list_of_categories=categoryprocesses, - list_of_processes=processes, list_of_processtypes=processtypes) - - # #adds systematics to processes - self.load_from_file_add_systematics(list_of_categories=categoryprocesses, - list_of_processes=processes) - - - else: - print "could not load %s: no such file" % pathToDatacard + def block_separator(self, seperator): + self._block_separator = seperator - - def load_from_file_add_categories(self,list_of_categories): - """ - Line for categories: careful with combined categories, - key logic wont be working cause channels will be numerated - original name in Combination line - """ - for shapelines in self._shapelines_: - shape = shapelines.split() - category_name = shape[2] - process_name = shape[1] - file = shape[3] - histname = shape[4] - systname = shape[5] - if category_name=="*" and process_name=="*": - for category in list_of_categories: - self.create_category(categoryName=category, - default_file=file,generic_key_systematic_hist=systname, - generic_key_nominal_hist=histname) - elif category_name in list_of_categories and process_name== "*": - self.create_category(categoryName=category_name, - default_file=file,generic_key_systematic_hist=systname, - generic_key_nominal_hist=histname) - for category in list_of_categories: - if not category in self._categories: - self._categories[categoryName]=categoryObject(categoryName=category) - - - - def create_category(self,categoryName,default_file=None,generic_key_systematic_hist=None, - generic_key_nominal_hist=None): + + def write_datacard(self,analysis): """ - Adds a categoryObject with default file and generic keys. + Main function to write datacard from analysis object """ - self._categories[categoryName] = categoryObject(categoryName=categoryName, - defaultRootFile=default_file,systkey=generic_key_systematic_hist, - defaultnominalkey=generic_key_nominal_hist) - if self._debug >= 50: - print "initialized category", categoryName - print self._categories[categoryName] + if not isinstance(analysis,analysisObject): + print "ERROR! No analysisObject!" + return False + text = "" + if self._outputpath and not path.exists(self._outputpath): + text = self.create_datacard_text(analysis=analysis) + elif path.exists(self._outputpath) and self._replace_files: + text = self.create_datacard_text(analysis=analysis) + if not text == "": + with open(self._outputpath, "w") as f: + f.write(text) + else: + print "ERROR: Could not write datacard here:", self._outputpath - def load_from_file_add_processes(self,list_of_processes,list_of_categories,list_of_processtypes): + def create_datacard_text(self,analysis): """ - Adds processes to the corresponding categories. - Initializes process with file and key information. + collect all datacard parts and separates them with the + defined block separator """ - for shapelines in self._shapelines_: - shape = shapelines.split() - category_name = shape[2] - process_name = shape[1] - file = shape[3] - histname = shape[4] - systname = shape[5] - #if the process is explicitly written in the file, initialize process with file and key information of the readout file - for category,process,processtype in zip(list_of_categories,list_of_processes,list_of_processtypes): - if (category_name==category and process_name ==process) or (category_name=="*" and process_name==process): - self.create_process(categoryName=category, processName=process_name, - processType=processtype, file=file, - key_nominal_hist=histname, key_systematic_hist=systname) - # if the process is not explicitly written in the file, initialize process with - # the generic keys and default file of the corresponding category - for category,process,processtype in zip(list_of_categories,list_of_processes,list_of_processtypes): - if not process in self._categories[category]: - self.create_process(categoryName=category,processName=process,processType=processtype) - - - def create_process(self,categoryName,processName,processType,file=None,key_nominal_hist=None,key_systematic_hist=None): + content = [] """ - Adds a signal or background process dependant on the value x of the processType - (x<=0 signal process, x>=1 background process) - If no list of file and key names is handed over, it uses the default information of the category object - to initialize a process. + create the header """ - if self._debug>100: - print key_nominal_hist - print key_systematic_hist - if int(processType)<=0: - self._categories[categoryName].create_signal_process(processName=processName, - rootfile=file,histoname=key_nominal_hist,systkey=key_systematic_hist) - elif int(processType)>0: - self._categories[categoryName].create_background_process(processName=processName, - rootfile=file,histoname=key_nominal_hist,systkey=key_systematic_hist) - - if self._debug >= 50: - print "initialized process", processName, "in category", categoryName - print self._categories[categoryName] - - - def load_from_file_add_systematics(self,list_of_categories,list_of_processes): + content.append(self.create_header(analysis=analysis)) """ - One line for one systematic, knows type: adds systematic to process with - value given in the file + create the keyword block """ - for systematics in self._systematics_: - systematic = systematics.split() - sys=systematic[0] - typ=systematic[1] - systematic.pop(1) - systematic.pop(0) - for value,process,category in zip(systematic,list_of_processes,list_of_categories): - if value!="-": - self._categories[category][process].add_uncertainty( syst = sys, - typ = typ, value = value) - - def get_number_of_procs(self): + content.append(self.create_keyword_block(analysis=analysis)) """ - Get number of processes. Returns 0 if some categories have different - amount of processes! + create observation block """ - num = 0 - for cat in self._categories: - currentnum = self._categories[cat].n_signal_procs - currentnum += self._categories[cat].n_background_procs - if num == 0: num = currentnum - if not num == currentnum: - print "Mismatch! Categories have different number of processes!" - num = 0 - break - return num - - def collect_uncertainties(self, process_dict): + content.append(self.create_observation_block(analysis=analysis)) """ - Loop over all process in the dictionary 'process_dict' and save - the respective systematic uncertainties and correlations + create block for systematic variations and corresponding process + and categories """ - for process_name in process_dict: - process = process_dict[process_name] - for syst in process.uncertainties: - #first, check if uncertainty is already known - #if not, create new systematicsObject - if not syst in self._systematics: - self._systematics[syst] = systematicObject(name = syst, - nature = process.get_uncertainty_type(syst)) - self._systematics[syst].add_process(process = process) - - - def update_systematics(self, category): - #does not update only collects first time? - self.collect_uncertainties(process_dict = category.signal_processes) - self.collect_uncertainties(process_dict = category.background_processes) + content.append(self.create_process_block(analysis=analysis)) + content.append(self.create_systematics_block(analysis=analysis)) + + return self._block_separator.join(content) - - def create_header(self): + def create_header(self,analysis): """ Create header for the datacard. The header has the following form: imax -> number of bins @@ -330,33 +150,94 @@ def create_header(self): text2workspace.py to calculate these numbers on the fly. """ header = [] - ncats = "*" - nprocs = "*" - nsysts = "*" - if True: - #if self._hardcode_numbers: - #get number of categories - if len(self._categories) != 0: - ncats = len(self._categories) + number_categories = "*" + number_processes = "*" + number_systematics = "*" + if self._hardcode_numbers: + if len(analysis.categories) != 0: + number_categories = len(analysis.categories) else: print "Could not find categories! Cannot hard code 'imax'" #get number of processes - nprocs = self.get_number_of_procs() - 1 - if nprocs == -1: nprocs = "*" + number_processes = self.get_number_of_procs(analysis=analysis) - 1 + if number_processes is -1: number_processes = "*" #get number of systematics - if len(self._systematics) != 0: - nsysts = len(self._systematics) + if len(analysis.systematics) != 0: + number_systematics = len(analysis.systematics) else: print "WARNING: Did not find any systematics!" - header.append("imax {0} number of bins".format(ncats)) - header.append("jmax {0} number of processes minus 1".format(nprocs)) - header.append("kmax {0} number of nuisance parameters".format(nsysts)) + header.append("imax {0} number of bins".format(number_categories)) + header.append("jmax {0} number of processes minus 1".format(number_processes)) + header.append("kmax {0} number of nuisance parameters".format(number_systematics)) return "\n".join(header) + def get_number_of_procs(self,analysis): + """ + Get number of processes. Returns 0 if some categories have different + amount of processes! + """ + number = 0 + for category in analysis.categories: + currentnumber = analysis[category].n_signal_procs + currentnumber += analysis[category].n_background_procs + if number is 0: number = currentnumber + if not number is currentnumber: + print "Mismatch! Categories have different number of processes!" + number = 0 + break + return number + + def create_keyword_block(self,analysis): + """ + Create block with keywords with which to find the systematic variations + for different processes. This block has the following form: + shape $PROCESS $CHANNEL /path/to/rootfile $NOMINAL_KEY $SYST_VAR_KEY + + with + $PROCESS - process name (can be '*' for all processes) + $CHANNEL - category name (can be '*' for all categories) + /path/to/rootfile - path to .root file with templates + $NOMINAL_KEY - key for nominal templates. If '*' was used before, + this should contain '$CHANNEL' and/or '$PROCESS' + $SYST_VAR_KEY - key for templates for systematic variations. + The key has to contain '$SYSTEMATIC'. If '*' was + used before, this should contain '$CHANNEL' + and/or '$PROCESS' + """ + size=self.get_max_size([analysis.categories,self.get_bkg_processes(analysis=analysis),self.get_signal_processes(analysis=analysis)]) + size+=5 + sizekeys=self.get_max_size_keys(analysis) + sizekeys+=5 + if self._debug>99: + print "DEBUGGING" + print "-".ljust(50) + print sizekeys + print "-".ljust(50) + + lines = [] + for category in analysis.categories: + lines +=(self.write_keyword_process_lines(category=analysis[category],size=size,sizekeys=sizekeys)) + return "\n".join(lines) + + def write_keyword_process_lines(self,category,size,sizekeys): + line=[] + + #adds the generic key + line.append(self.write_keyword_generic_lines(category=category,sizekeys=sizekeys,size=size)) + #adds the process keys (need to add: only add process key if it doesnt match the generic key) + for process in category: + file=category[process].file + key_nominal_hist=category[process].key_nominal_hist + key_systematic_hist=category[process].key_systematic_hist + if not file=="" and not key_nominal_hist=="" and not key_systematic_hist=="": + line.append(self.write_keyword_block_line(process_name=process,category_name=category.name,file=file, + nominal_key=key_nominal_hist,syst_key=key_systematic_hist,size=size,sizekeys=sizekeys)) + return line + def write_keyword_block_line(self, process_name, category_name, file, nominal_key, syst_key, size, sizekeys): if size<11: @@ -384,30 +265,17 @@ def write_keyword_generic_lines(self, category, size, sizekeys): return line - def write_keyword_process_lines(self,category,size,sizekeys): - line=[] - - #adds the generic key - line.append(self.write_keyword_generic_lines(category=category,sizekeys=sizekeys,size=size)) - #adds the process keys (need to add: only add process key if it doesnt match the generic key) - for process in category: - file=category[process].file - key_nominal_hist=category[process].key_nominal_hist - key_systematic_hist=category[process].key_systematic_hist - if not file=="" and not key_nominal_hist=="" and not key_systematic_hist=="": - line.append(self.write_keyword_block_line(process_name=process,category_name=category.name,file=file, - nominal_key=key_nominal_hist,syst_key=key_systematic_hist,size=size,sizekeys=sizekeys)) - return line + - def get_max_size_keys(self): + def get_max_size_keys(self,analysis): keynames=[] - for category_name in self._categories: - category=self._categories[category_name] + for category_name in analysis.categories: + category=analysis[category_name] keynames.append(category.default_file) keynames.append(category.generic_key_nominal_hist) keynames.append(category.generic_key_systematic_hist) for process_name in category: - process=self._categories[category_name][process_name] + process=analysis[category_name][process_name] keynames.append(process.file) keynames.append(process.key_nominal_hist) keynames.append(process.key_systematic_hist) @@ -415,40 +283,10 @@ def get_max_size_keys(self): return len(max(keynames,key=len)) - def create_keyword_block(self): - """ - Create block with keywords with which to find the systematic variations - for different processes. This block has the following form: - shape $PROCESS $CHANNEL /path/to/rootfile $NOMINAL_KEY $SYST_VAR_KEY - - with - $PROCESS - process name (can be '*' for all processes) - $CHANNEL - category name (can be '*' for all categories) - /path/to/rootfile - path to .root file with templates - $NOMINAL_KEY - key for nominal templates. If '*' was used before, - this should contain '$CHANNEL' and/or '$PROCESS' - $SYST_VAR_KEY - key for templates for systematic variations. - The key has to contain '$SYSTEMATIC'. If '*' was - used before, this should contain '$CHANNEL' - and/or '$PROCESS' - """ - size=self.get_max_size([self._categories,self.get_bkg_processes(),self.get_signal_processes()]) - size+=5 - sizekeys=self.get_max_size_keys() - sizekeys+=5 - if self._debug>99: - print "DEBUGGING" - print "-".ljust(50) - print sizekeys - print "-".ljust(50) - - lines = [] - for category in self._categories: - lines +=(self.write_keyword_process_lines(category=self._categories[category],size=size,sizekeys=sizekeys)) - return "\n".join(lines) + - def create_observation_block(self): + def create_observation_block(self,analysis): """ Create with observation. The block has the following format: @@ -465,12 +303,12 @@ def create_observation_block(self): lines = [] bins = ["bin"] observation = ["observation"] - for category in self._categories: + for category in analysis.categories: obs=0 value=True bins.append("%s" % category) - data_obs = self._categories[category].observation + data_obs = analysis[category].observation if isinstance(data_obs, processObject): if self._hardcode_numbers: observation.append("-1") @@ -491,22 +329,7 @@ def create_observation_block(self): return "\n".join(lines) - - - def get_signal_processes(self): - #Overwriting for more than 1 category, only working when same processes for all categories - for category in self._categories: - sigprc=sorted(self._categories[category]._signalprocs) - return sigprc - - def get_bkg_processes(self): - #Overwriting for more than 1 category, only working when same processes for all categories - for category in self._categories: - bkgprc=sorted(self._categories[category]._bkgprocs) - return bkgprc - - - def create_process_block(self): + def create_process_block(self,analysis): """ Create the process block of the datacard. It has the following format: bin $CHANNEL_1 $CHANNEL_1 $CHANNEL_2 (...) @@ -523,37 +346,47 @@ def create_process_block(self): THIS IS NOT PART OF THE DATACARD! """ - signalprocs=self.get_signal_processes() - bkgprocs=self.get_bkg_processes() + """ + get sorted list of signal processes and backgroundprocesses for + all categories for better readability + """ + signalprocs=self.get_signal_processes(analysis=analysis) + bkgprocs=self.get_bkg_processes(analysis=analysis) lines = [] - #Leaves one bin empty, necessary for systematics block + """ + Leaves one bin empty, necessary for systematics block + """ bins = ["bin",""] process = ["process",""] process_index = ["process",""] rate = ["rate","" ] - for category in self._categories: - #Signal processes first - for number,signal_process in enumerate(signalprocs): - - bins.append("%s" % category) - process.append("%s" % signal_process) - - index=1+number-len(self._categories[category]._signalprocs) - process_index.append("%s" % str(index)) - - rate.append("%s" % str(self._categories[category]._signalprocs[signal_process].eventcount)) - #Same with background processes - for number,bkg_process in enumerate(bkgprocs): - bins.append("%s" % category) - process.append("%s" % bkg_process) - - index=1+number - process_index.append("%s" % str(index)) - rate.append("%s" % str(self._categories[category]._bkgprocs[bkg_process].eventcount)) - size=self.get_max_size([bins,process,self._systematics]) + for category in analysis.categories: + """ + Signal processes first + """ + for number,signal_process in enumerate(signalprocs): + + bins.append("%s" % category) + process.append("%s" % signal_process) + + index=1+number-len(analysis[category].signal_processes) + process_index.append("%s" % str(index)) + + rate.append("%s" % str(analysis[category][signal_process].eventcount)) + """ + Same with background processes + """ + for number,bkg_process in enumerate(bkgprocs): + bins.append("%s" % category) + process.append("%s" % bkg_process) + + index=1+number + process_index.append("%s" % str(index)) + rate.append("%s" % str(analysis[category][bkg_process].eventcount)) + size=self.get_max_size([bins,process,analysis.systematics]) size+=5 scaled_bins = [x.ljust(size) for x in bins] @@ -565,7 +398,22 @@ def create_process_block(self): lines.append("".join(scaled_process_index)) lines.append("".join(scaled_rate)) return "\n".join(lines) - + + + + def get_signal_processes(self,analysis): + #Overwriting for more than 1 category, only working when same processes for all categories + for category in analysis.categories: + sigprc=sorted(analysis[category].signal_processes) + return sigprc + + def get_bkg_processes(self,analysis): + #Overwriting for more than 1 category, only working when same processes for all categories + for category in analysis.categories: + bkgprc=sorted(analysis[category].background_processes) + return bkgprc + + def get_max_size(self,liste): templiste=[] @@ -574,7 +422,7 @@ def get_max_size(self,liste): return len(max(templiste,key=len)) - def create_systematics_block(self): + def create_systematics_block(self,analysis): """ Create block for nuisance parameters. Format is as follows: $SYST_1_NAME $SYST_1_TYPE CORRELATION_PROC_1 CORRELATION_PROC_2 (...) @@ -589,54 +437,31 @@ def create_systematics_block(self): IMPORTANT: The order of process has to be the same as in the process block """ - signalprocs=self.get_signal_processes() - bkgprocs=self.get_bkg_processes() + signalprocs=self.get_signal_processes(analysis) + bkgprocs=self.get_bkg_processes(analysis) - size=self.get_max_size([signalprocs,bkgprocs,self._systematics,self._categories]) + size=self.get_max_size([signalprocs,bkgprocs,analysis.systematics,analysis.categories]) size+=5 lines = [] - for systematic in self._systematics: + for systematic in analysis.systematics: temp="%s" % systematic.ljust(size) - temp+="%s" % str(self._systematics[systematic].type).ljust(size) - for category in self._categories: - #Signal processes first + temp+="%s" % str(analysis.systematics[systematic].type).ljust(size) + for category in analysis.categories: + """ + Signal processes first, then background processes + """ for number,signal_process in enumerate(signalprocs): - temp += "%s" % str(self._systematics[systematic].get_correlation_raw(process_name=signal_process, + temp += "%s" % str(analysis.systematics[systematic].get_correlation_raw(process_name=signal_process, category_name=category)).ljust(size) for number,bkg_process in enumerate(bkgprocs): - temp += "%s" % str(self._systematics[systematic].get_correlation_raw(process_name=bkg_process, + temp += "%s" % str(analysis.systematics[systematic].get_correlation_raw(process_name=bkg_process, category_name=category)).ljust(size) lines.append(temp) return "\n".join(lines) - def create_datacard_text(self): - #create datacard header - content = [] - for cat in self._categories: - self.update_systematics(self._categories[cat]) - content.append(self.create_header()) - #create keyword block - content.append(self.create_keyword_block()) - #create observation block - content.append(self.create_observation_block()) - #create block with keywords for systematic variations - content.append(self.create_process_block()) - content.append(self.create_systematics_block()) - - return self._block_separator.join(content) - - def write_datacard(self): - text = "" - if self._outputpath and not path.exists(self._outputpath): - text = self.create_datacard_text() - elif path.exists(self._outputpath) and self._replace_files: - text = self.create_datacard_text() + - if not text == "": - with open(self._outputpath, "w") as f: - f.write(text) - else: - print "ERROR: Could not write datacard here:", self._outputpath + diff --git a/src/processObject.py b/src/processObject.py index 59b9e8f..665b7ba 100644 --- a/src/processObject.py +++ b/src/processObject.py @@ -65,76 +65,55 @@ def calculate_yield(self): #getter/setter for yields @property def eventcount(self): - return self.get_yield() - @eventcount.setter - def eventcount(self, val): - self.set_yield(val) - - def set_yield(self, val): - """ - set yield for processes to value val - """ - self._eventcount = val - - def get_yield(self): """ get yield for process """ y = self._eventcount if self._debug >= 99: print "returning yield of", y - return y + return y + + @eventcount.setter + def eventcount(self, val): + """ + set yield for processes to value val + """ + self._eventcount = val #logic for process name @property def name(self): - return self.get_name() + """ + get name for process + """ + return self._name @name.setter def name(self, s): - if self._debug >= 99: - print "entered setter for name" - self.set_name(s) - - - def set_name(self, name): """ set process name """ if self._debug >= 20: print "setting name to", name self._name = name - - def get_name(self): - """ - create copy of process name - """ - s = self._name - return s + + @property def category(self): - return self.get_category() - - @category.setter - def category(self, catname): - if self._debug >= 99: - print "entered setter for category" - self.set_category(catname) - # self._categoryname = catname - - def get_category(self): """ get name for category to which this process belongs to """ return self._categoryname - def set_category(self, catname): + @category.setter + def category(self, catname): """ set name for category to which this process belongs to """ if self._debug >= 20: print "setting category to", catname self._categoryname = catname + @property def file(self): @@ -188,41 +167,12 @@ def key_systematic_hist(self, key): @property def uncertainties(self): return list(self._uncertainties.keys()) - - def __str__(self): - """ - current setup: print delivers: - - process name - - process yield - - list of nuisance parameters - """ - s = [] - s.append("Process infos:") - s.append("\tname:\t%s" % self.get_name()) - s.append("\tcategory:\t%s" % self.get_category()) - s.append("\trootfile:\t%s" % self._file_handler.filepath) - s.append("\tnominal histname:\t%s" % self._nominalhistname) - s.append("\tsystematic histname:\t%s" % self._systkey) - s.append("\tyield:\t{0}".format(self._eventcount)) - if len(self._uncertainties) != 0: - s.append("\tlist of uncertainties:") - - temp = "\t\t%s" % "uncertainty".ljust(15) - temp += "\t%s" % "type".ljust(10) - temp += "\t%s" % "correlation".ljust(15) - s.append(temp) - s.append("\t\t"+"_"*len(temp.expandtabs())) - for syst in self._uncertainties: - temp = "\t\t%s" % syst.ljust(15) - temp += "\t%s" % self._uncertainties[syst]["type"].ljust(10) - temp += "\t%s" % str(self._uncertainties[syst]["value"]).ljust(15) - s.append(temp) - return "\n".join(s) + def add_uncertainty(self, syst, typ, value): """ add an uncertainty to this process. This function checks - - whether there already is an entry for 'systname' + - whether there already is an entry for 'systematicName' - the given value is suitable for a datacard (see valueConventions.is_good_systval) and only adds the systematics if it's new and has a good value @@ -230,10 +180,16 @@ def add_uncertainty(self, syst, typ, value): if isinstance(syst, str) and isinstance(typ, str): if not syst in self._uncertainties: - if typ == "shape": + if not self._value_rules.is_allowed_type(typ=typ): + return False + if typ is "shape": + tmp = syst + if syst.startswith("#"): + tmp = tmp.replace("#","") + tmp = tmp.strip() print "Looking for varied histograms for systematic", syst keys = self._id_logic.build_systematic_histo_names( - systematic_name = syst, base_key = self._systkey) + systematic_name = tmp, base_key = self._systkey) if not all(self._file_handler.histogram_exists(k) for k in keys): return False if self._value_rules.is_good_systval(value): @@ -255,42 +211,113 @@ def add_uncertainty(self, syst, typ, value): return False - def set_uncertainty(self, systname, typ, value): + def set_uncertainty(self, systematicName, typ, value): """ - set the uncertainty 'systname' for this process to type 'typ' + set the uncertainty 'systematicName' for this process to type 'typ' and value 'value'. This function checks - - whether there is an entry for 'systname' in the first place + - whether there is an entry for 'systematicName' in the first place - the given value is suitable for a datacard (see 'is_good_systval') and only adds the systematics if there is an entry and the value is good """ - if systname in self._uncertainties: + if systematicName in self._uncertainties: if self._value_rules.is_good_systval(value): - self._uncertainties[systname]["value"] = str(value) - self._uncertainties[systname]["type"] = typ + self._uncertainties[systematicName]["value"] = str(value) + self._uncertainties[systematicName]["type"] = typ else: - s = "There is no entry for uncertainty %s" % systname + s = "There is no entry for uncertainty %s" % systematicName s += " in process %s! Please add it first" % self.get_name() print s - def get_uncertainty_value(self, systname): + def delete_uncertainty(self,systematicName): + if systematicName in self._uncertainties: + del self._uncertainties[systematicName] + if self._debug>30: + temp= "DEBUG: deleted uncertainty %s in process %s" % (systematicName,self.name) + if not self.category=="": + temp+=" in category %s" % self.category + print "".join(temp) + + + else: + print "ERROR: uncertainty %s not in process %s" % (systematicName,self.name) + + + def delete_uncertainties(self,list_of_systnames): + for systematic in list_of_systnames: + self.delete_uncertainty(systematicName=systematic) + + + def get_uncertainty_value(self, systematicName): """ - return correlation of uncertainty 'systname' with this process. - If there is no entry for 'systname' in this process, the function + return correlation of uncertainty 'systematicName' with this process. + If there is no entry for 'systematicName' in this process, the function returns '-' """ - if systname in self._uncertainties: - return self._uncertainties[systname]["value"] + if systematicName in self._uncertainties: + return self._uncertainties[systematicName]["value"] else: return "-" - def get_uncertainty_type(self, systname): + def get_uncertainty_type(self, systematicName): """ - return type of uncertainty 'systname' in this process. - If there is no entry for 'systname' in this process, the function + return type of uncertainty 'systematicName' in this process. + If there is no entry for 'systematicName' in this process, the function returns '' """ - if systname in self._uncertainties: - return self._uncertainties[systname]["type"] + if systematicName in self._uncertainties: + return self._uncertainties[systematicName]["type"] else: return "" + + def __str__(self): + """ + current setup: print delivers: + - process name + - process yield + - list of nuisance parameters + """ + s = [] + s.append("Process infos:") + s.append("\tname:\t%s" % self.name) + s.append("\tcategory:\t%s" % self.category) + s.append("\trootfile:\t%s" % self._file_handler.filepath) + s.append("\tnominal histname:\t%s" % self._nominalhistname) + s.append("\tsystematic histname:\t%s" % self._systkey) + s.append("\tyield:\t{0}".format(self._eventcount)) + if len(self._uncertainties) != 0: + s.append("\tlist of uncertainties:") + + temp = "\t\t%s" % "uncertainty".ljust(15) + temp += "\t%s" % "type".ljust(10) + temp += "\t%s" % "correlation".ljust(15) + s.append(temp) + s.append("\t\t"+"_"*len(temp.expandtabs())) + for syst in self._uncertainties: + temp = "\t\t%s" % syst.ljust(15) + temp += "\t%s" % self._uncertainties[syst]["type"].ljust(10) + temp += "\t%s" % str(self._uncertainties[syst]["value"]).ljust(15) + s.append(temp) + return "\n".join(s) + + """ + overloaded get, in and for operator to get better access to systematics in + process object: + self[systematicName] + """ + + def __getitem__(self, systematicName): + if systematicName in self._uncertainties: + return self._uncertainties[systematicName] + else: + print "ERROR: Process not in Category!" + + def __iter__(self): + all_uncertainties=self._uncertainties + return all_uncertainties.__iter__() + + def __contains__(self, systematicName): + if systematicName in self._uncertainties: + return True + else: + return False diff --git a/src/systematicObject.py b/src/systematicObject.py index c0ac88b..bfde129 100644 --- a/src/systematicObject.py +++ b/src/systematicObject.py @@ -91,7 +91,7 @@ def add_process(self, process, correlation = "-"): cor = self.get_correlation(process = process) if cor == "-": if correlation == "-": - correlation = process.get_uncertainty_value(systname = self._name) + correlation = process.get_uncertainty_value(systematicName = self._name) self.add_process_raw( category_name = process.category, process_name = process.name, value = correlation) @@ -134,7 +134,7 @@ def set_correlation(self, process, value = "-"): process_name = process.name category = process.category if value == "-": - value = process.get_uncertainty_value(systname = self._name) + value = process.get_uncertainty_value(systematicName = self._name) if process_name in self._dic[category]: if self._value_rules.is_good_systval(value): self._dic[category][process_name] = value