Skip to content

Commit

Permalink
significance plots
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Sep 4, 2023
1 parent 68dd573 commit 6e765c9
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 131 deletions.
71 changes: 29 additions & 42 deletions src/HHbbVV/postprocessing/PostProcessVBF.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"import corrections\n",
"from collections import OrderedDict\n",
"\n",
"from utils import CUT_MAX_VAL\n",
"from utils import CUT_MAX_VAL, ShapeVar\n",
"from hh_vars import (\n",
" years,\n",
" data_key,\n",
Expand Down Expand Up @@ -150,46 +150,32 @@
"outputs": [],
"source": [
"# {var: (bins, label)}\n",
"control_plot_vars = {\n",
" # \"MET_pt\": ([50, 0, 300], r\"$p^{miss}_T$ (GeV)\"),\n",
" \"DijetEta\": ([30, -8, 8], r\"$\\eta^{jj}$\"),\n",
" \"DijetPt\": ([30, 0, 750], r\"$p_T^{jj}$ (GeV)\"),\n",
" \"DijetMass\": (\n",
" # list(range(800, 1400, 100)) + [1400, 1600, 2000, 3000, 4400],\n",
" [30, 600, 4500],\n",
" r\"$m^{jj}$ (GeV)\",\n",
" ),\n",
" \"bbFatJetEta\": ([30, -2.4, 2.4], r\"$\\eta^{bb}$\"),\n",
" \"bbFatJetPt\": ([30, 300, 1500], r\"$p^{bb}_T$ (GeV)\"),\n",
" \"bbFatJetParticleNetMass\": ([20, 50, 250], r\"$m^{bb}_{reg}$ (GeV)\"),\n",
" # \"bbFatJetMsd\": ([50, 0, 300], r\"$m^{bb}_{msd}$ (GeV)\"),\n",
" # \"bbFatJetParticleNetMD_Txbb\": ([50, 0.8, 1], r\"$T^{bb}_{Xbb}$\"),\n",
" \"VVFatJetEta\": ([30, -2.4, 2.4], r\"$\\eta^{VV}$\"),\n",
" \"VVFatJetPt\": ([30, 300, 1500], r\"$p^{VV}_T$ (GeV)\"),\n",
" \"VVFatJetParticleNetMass\": (\n",
" # list(range(50, 110, 10)) + list(range(110, 200, 15)) + [200, 220, 250],\n",
" [20, 50, 250],\n",
" r\"$m^{VV}_{reg}$ (GeV)\",\n",
" ),\n",
" # \"VVFatJetMsd\": ([40, 50, 250], r\"$m^{VV}_{msd}$ (GeV)\"),\n",
" # \"VVFatJetParticleNet_Th4q\": ([50, 0, 1], r\"Prob($H \\to 4q$) vs Prob(QCD) (Non-MD)\"),\n",
" # \"VVFatJetParTMD_THWW4q\": (\n",
" # [50, 0, 1],\n",
" # r\"Prob($H \\to VV \\to 4q$) vs Prob(QCD) (Mass-Decorrelated)\",\n",
" # ),\n",
" # \"VVFatJetParTMD_probT\": ([50, 0, 1], r\"Prob(Top) (Mass-Decorrelated)\"),\n",
" # \"VVFatJetParTMD_THWWvsT\": (\n",
" # [50, 0, 1],\n",
" # r\"$T^{VV}_{HWW}$\",\n",
" # ),\n",
" # \"bbFatJetPtOverDijetPt\": ([50, 0, 40], r\"$p^{bb}_T / p_T^{jj}$\"),\n",
" # \"VVFatJetPtOverDijetPt\": ([50, 0, 40], r\"$p^{VV}_T / p_T^{jj}$\"),\n",
" # \"VVFatJetPtOverbbFatJetPt\": ([50, 0.4, 2.0], r\"$p^{VV}_T / p^{bb}_T$\"),\n",
" # \"nGoodMuons\": ([3, 0, 3], r\"# of Muons\"),\n",
" # \"nGoodElectrons\": ([3, 0, 3], r\"# of Electrons\"),\n",
" # \"nGoodJets\": ([5, 0, 5], r\"# of AK4 B-Jets\"),\n",
" # \"BDTScore\": ([50, 0, 1], r\"BDT Score\"),\n",
"}\n",
"control_plot_vars = [\n",
" # ShapeVar(var=\"MET_pt\", label=r\"$p^{miss}_T$ (GeV)\", bins=[50, 0, 300]),\n",
" # ShapeVar(var=\"DijetEta\", label=r\"$\\eta^{jj}$\", bins=[30, -8, 8]),\n",
" # ShapeVar(var=\"DijetPt\", label=r\"$p_T^{jj}$ (GeV)\", bins=[30, 0, 750]),\n",
" # ShapeVar(var=\"DijetMass\", label=r\"$m^{jj}$ (GeV)\", bins=[30, 600, 4000]),\n",
" # ShapeVar(var=\"bbFatJetEta\", label=r\"$\\eta^{bb}$\", bins=[30, -2.4, 2.4]),\n",
" ShapeVar(var=\"bbFatJetPt\", label=r\"$p^{bb}_T$ (GeV)\", bins=[30, 300, 1500], significance_dir=\"right\"),\n",
" ShapeVar(var=\"bbFatJetParticleNetMass\", label=r\"$m^{bb}_{reg}$ (GeV)\", bins=[20, 50, 250], significance_dir=\"bin\"),\n",
" # ShapeVar(var=\"bbFatJetMsd\", label=r\"$m^{bb}_{msd}$ (GeV)\", bins=[50, 0, 300]),\n",
" # ShapeVar(var=\"bbFatJetParticleNetMD_Txbb\", label=r\"$T^{bb}_{Xbb}$\", bins=[50, 0.8, 1]),\n",
" # ShapeVar(var=\"VVFatJetEta\", label=r\"$\\eta^{VV}$\", bins=[30, -2.4, 2.4]),\n",
" # ShapeVar(var=\"VVFatJetPt\", label=r\"$p^{VV}_T$ (GeV)\", bins=[30, 300, 1500]),\n",
" # ShapeVar(var=\"VVParticleNetMass\", label=r\"$m^{VV}_{reg}$ (GeV)\", bins=[20, 50, 250]),\n",
" # ShapeVar(var=\"VVFatJetMsd\", label=r\"$m^{VV}_{msd}$ (GeV)\", bins=[40, 50, 250]),\n",
" # ShapeVar(var=\"VVFatJetParticleNet_Th4q\", label=r\"Prob($H \\to 4q$) vs Prob(QCD) (Non-MD)\", bins=[50, 0, 1]),\n",
" # ShapeVar(var=\"VVFatJetParTMD_THWW4q\", label=r\"Prob($H \\to VV \\to 4q$) vs Prob(QCD) (Mass-Decorrelated)\", bins=[50, 0, 1]),\n",
" # ShapeVar(var=\"VVFatJetParTMD_probT\", label=r\"Prob(Top) (Mass-Decorrelated)\", bins=[50, 0, 1]),\n",
" # ShapeVar(var=\"VVFatJetParTMD_THWWvsT\", label=r\"$T^{VV}_{HWW}$\", bins=[50, 0, 1]),\n",
" # ShapeVar(var=\"bbFatJetPtOverDijetPt\", label=r\"$p^{bb}_T / p_T^{jj}$\", bins=[50, 0, 40]),\n",
" # ShapeVar(var=\"VVFatJetPtOverDijetPt\", label=r\"$p^{VV}_T / p_T^{jj}$\", bins=[50, 0, 40]),\n",
" # ShapeVar(var=\"VVFatJetPtOverbbFatJetPt\", label=r\"$p^{VV}_T / p^{bb}_T$\", bins=[50, 0.4, 2.0]),\n",
" # ShapeVar(var=\"nGoodMuons\", label=r\"# of Muons\", bins=[3, 0, 3]),\n",
" # ShapeVar(var=\"nGoodElectrons\", label=r\"# of Electrons\", bins=[3, 0, 3]),\n",
" # ShapeVar(var=\"nGoodJets\", label=r\"# of AK4 B-Jets\", bins=[5, 0, 5]),\n",
" # ShapeVar(var=\"BDTScore\", label=r\"BDT Score\", bins=[50, 0, 1]),\n",
"]\n",
"\n",
"hists = postprocessing.control_plots(\n",
" events_dict,\n",
Expand All @@ -206,8 +192,9 @@
" \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 2e3,\n",
" \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": 2e3,\n",
" },\n",
" plot_significance=True,\n",
" show=True,\n",
" log=\"both\",\n",
" log=False,\n",
")"
]
},
Expand Down
63 changes: 59 additions & 4 deletions src/HHbbVV/postprocessing/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,14 @@ def _fill_error(ax, edges, down, up, scale=1):
)


def _asimov_significance(s, b):
"""Asimov estimate of discovery significance (with no systematic uncertainties).
See e.g. https://www.pp.rhul.ac.uk/~cowan/atlas/cowan_atlas_15feb11.pdf.
Or for more explanation: https://www.pp.rhul.ac.uk/~cowan/stat/cowan_munich16.pdf
"""
return np.sqrt(2 * ((s + b) * np.log(1 + (s / b)) - s))


def ratioHistPlot(
hists: Hist,
year: str,
Expand All @@ -210,6 +218,8 @@ def ratioHistPlot(
log: bool = False,
ratio_ylims: List[float] = [0, 2],
divide_bin_width: bool = False,
plot_significance: bool = False,
significance_dir: str = "right",
axrax: Tuple = None,
):
"""
Expand Down Expand Up @@ -243,6 +253,8 @@ def ratioHistPlot(
bg_order (List[str]): order in which to plot backgrounds
ratio_ylims (List[float]): y limits on the ratio plots
divide_bin_width (bool): divide yields by the bin width (for resonant fit regions)
plot_significance (bool): plot Asimov significance below ratio plot
significance_dir (str): "Direction" for significance. i.e. a > cut ("right"), a < cut ("left"), or per-bin ("bin").
axrax (Tuple): optionally input ax and rax instead of creating new ones
"""
# copy hists and bg_keys so input objects are not changed
Expand All @@ -258,13 +270,20 @@ def ratioHistPlot(
hists, data_err = _divide_bin_widths(hists, data_err)

# set up plots
if axrax is None:
if axrax is not None:
if plot_significance:
raise RuntimeError("Significance plots with input axes not implemented yet.")

ax, rax = axrax
ax.sharex(rax)
elif plot_significance:
fig, (ax, rax, sax) = plt.subplots(
3, 1, figsize=(12, 18), gridspec_kw=dict(height_ratios=[3, 1, 1], hspace=0), sharex=True
)
else:
fig, (ax, rax) = plt.subplots(
2, 1, figsize=(12, 14), gridspec_kw=dict(height_ratios=[3, 1], hspace=0), sharex=True
)
else:
ax, rax = axrax
ax.sharex(rax)

# plot histograms
y_label = r"Events / Bin Width (GeV$^{-1}$)" if divide_bin_width else "Events"
Expand Down Expand Up @@ -358,6 +377,42 @@ def ratioHistPlot(
rax.set_ylim(ratio_ylims)
rax.grid()

if plot_significance:
bg_tot = sum([pre_divide_hists[sample, :] for sample in bg_keys]).values()
sigs = [pre_divide_hists[sig_key, :].values() for sig_key in sig_scale_dict]

if significance_dir == "left":
bg_tot = np.cumsum(bg_tot[::-1])[::-1]
sigs = [np.cumsum(sig[::-1])[::-1] for sig in sigs]
sax.set_ylabel(r"Asimov Sign. for $\leq$ Cuts")
elif significance_dir == "right":
bg_tot = np.cumsum(bg_tot)
sigs = [np.cumsum(sig) for sig in sigs]
sax.set_ylabel(r"Asimov Sign. for $\geq$ Cuts")
elif significance_dir == "bin":
sax.set_ylabel("Asimov Sign. per Bin")
else:
raise RuntimeError(
'Invalid value for ``significance_dir``. Options are ["left", "right", "bin"].'
)

edges = pre_divide_hists.axes[1].edges
hep.histplot(
[(_asimov_significance(sig, bg_tot), edges) for sig in sigs],
ax=sax,
histtype="step",
label=[
sig_key if sig_key not in sample_label_map else sample_label_map[sig_key]
for sig_key in sig_scale_dict
],
color=sig_colours[: len(sig_keys)],
)

sax.legend(fontsize=12)
sax.set_yscale("log")
sax.set_ylim([1e-7, 10])
sax.set_xlabel(hists.axes[1].label)

if title is not None:
ax.set_title(title, y=1.08)

Expand Down
117 changes: 45 additions & 72 deletions src/HHbbVV/postprocessing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
jec_shifts,
jmsr_shifts,
)
from utils import CUT_MAX_VAL
from utils import CUT_MAX_VAL, ShapeVar

from pprint import pprint
from copy import deepcopy
Expand All @@ -62,36 +62,6 @@
# warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)


class ShapeVar:
"""Class to store attributes of the variable to be fit on.
Args:
var (str): variable name
label (str): variable label
bins (List[int]): bins
reg (bool, optional): Use a regular axis or variable binning. Defaults to True.
blind_window (List[int], optional): if blinding . Defaults to None.
"""

def __init__(
self,
var: str,
label: str,
bins: List[int],
reg: bool = True,
blind_window: List[int] = None,
):
self.var = var
self.label = label
self.blind_window = blind_window

# create axis used for histogramming
if reg:
self.axis = hist.axis.Regular(*bins, name=var, label=label)
else:
self.axis = hist.axis.Variable(bins, name=var, label=label)


@dataclass
class Syst:
samples: List[str] = None
Expand Down Expand Up @@ -123,36 +93,39 @@ class Region:
]

# {var: (bins, label)}
control_plot_vars = {
"MET_pt": ([40, 0, 320], r"$p^{miss}_T$ (GeV)"),
# "DijetEta": ([50, -8, 8], r"$\eta^{jj}$"),
# "DijetPt": ([50, 0, 750], r"$p_T^{jj}$ (GeV)"),
# "DijetMass": ([50, 500, 3000], r"$m^{jj}$ (GeV)"),
"bbFatJetPhi": ([40, -3.5, 3.5], r"$\varphi^{bb}$"),
"bbFatJetEta": ([40, -3, 3], r"$\eta^{bb}$"),
"bbFatJetPt": ([40, 300, 2300], r"$p^{bb}_T$ (GeV)"), # TODO: increase bin widths, x max
# "bbFatJetParticleNetMass": ([50, 0, 300], r"$m^{bb}_{reg}$ (GeV)"),
# "bbFatJetMsd": ([50, 0, 300], r"$m^{bb}_{msd}$ (GeV)"),
# "bbFatJetParticleNetMD_Txbb": ([50, 0.8, 1], r"$p^{bb}_{Txbb}$"),
"VVFatJetPhi": ([40, -3.5, 3.5], r"$\varphi^{VV}$"),
"VVFatJetEta": ([40, -3, 3], r"$\eta^{VV}$"),
"VVFatJetPt": ([40, 300, 2300], r"$p^{VV}_T$ (GeV)"),
# "VVFatJetParticleNetMass": ([50, 0, 300], r"$m^{VV}_{reg}$ (GeV)"),
# "VVFatJetMsd": ([50, 0, 300], r"$m^{VV}_{msd}$ (GeV)"),
# "VVFatJetParticleNet_Th4q": ([50, 0, 1], r"Prob($H \to 4q$) vs Prob(QCD) (Non-MD)"),
# "VVFatJetParTMD_THWW4q": (
# [50, 0, 1],
# r"Prob($H \to VV \to 4q$) vs Prob(QCD) (Mass-Decorrelated)",
# ),
# "VVFatJetParTMD_probT": ([50, 0, 1], r"Prob(Top) (Mass-Decorrelated)"),
# "bbFatJetPtOverDijetPt": ([50, 0, 40], r"$p^{bb}_T / p_T^{jj}$"),
# "VVFatJetPtOverDijetPt": ([50, 0, 40], r"$p^{VV}_T / p_T^{jj}$"),
# "VVFatJetPtOverbbFatJetPt": ([50, 0.4, 2.0], r"$p^{VV}_T / p^{bb}_T$"),
# "nGoodMuons": ([3, 0, 3], r"# of Muons"),
# "nGoodElectrons": ([3, 0, 3], r"# of Electrons"),
# "nGoodJets": ([5, 0, 5], r"# of AK4 B-Jets"),
# "BDTScore": ([50, 0, 1], r"BDT Score"),
}
control_plot_vars = [
ShapeVar(var="MET_pt", label=r"$p^{miss}_T$ (GeV)", bins=[50, 0, 300]),
ShapeVar(var="DijetEta", label=r"$\eta^{jj}$", bins=[30, -8, 8]),
ShapeVar(var="DijetPt", label=r"$p_T^{jj}$ (GeV)", bins=[30, 0, 750]),
ShapeVar(var="DijetMass", label=r"$m^{jj}$ (GeV)", bins=[30, 600, 4000]),
ShapeVar(var="bbFatJetEta", label=r"$\eta^{bb}$", bins=[30, -2.4, 2.4]),
ShapeVar(
var="bbFatJetPt", label=r"$p^{bb}_T$ (GeV)", bins=[30, 300, 1500], significance_dir="right"
),
ShapeVar(
var="bbFatJetParticleNetMass",
label=r"$m^{bb}_{reg}$ (GeV)",
bins=[20, 50, 250],
significance_dir="bin",
),
ShapeVar(var="bbFatJetMsd", label=r"$m^{bb}_{msd}$ (GeV)", bins=[50, 0, 300]),
ShapeVar(var="bbFatJetParticleNetMD_Txbb", label=r"$T^{bb}_{Xbb}$", bins=[50, 0.8, 1]),
ShapeVar(var="VVFatJetEta", label=r"$\eta^{VV}$", bins=[30, -2.4, 2.4]),
ShapeVar(var="VVFatJetPt", label=r"$p^{VV}_T$ (GeV)", bins=[30, 300, 1500]),
ShapeVar(var="VVParticleNetMass", label=r"$m^{VV}_{reg}$ (GeV)", bins=[20, 50, 250]),
ShapeVar(var="VVFatJetMsd", label=r"$m^{VV}_{msd}$ (GeV)", bins=[40, 50, 250]),
# ShapeVar(var="VVFatJetParticleNet_Th4q", label=r"Prob($H \to 4q$) vs Prob(QCD) (Non-MD)", bins=[50, 0, 1]),
# ShapeVar(var="VVFatJetParTMD_THWW4q", label=r"Prob($H \to VV \to 4q$) vs Prob(QCD) (Mass-Decorrelated)", bins=[50, 0, 1]),
# ShapeVar(var="VVFatJetParTMD_probT", label=r"Prob(Top) (Mass-Decorrelated)", bins=[50, 0, 1]),
ShapeVar(var="VVFatJetParTMD_THWWvsT", label=r"$T^{VV}_{HWW}$", bins=[50, 0, 1]),
ShapeVar(var="bbFatJetPtOverDijetPt", label=r"$p^{bb}_T / p_T^{jj}$", bins=[50, 0, 40]),
ShapeVar(var="VVFatJetPtOverDijetPt", label=r"$p^{VV}_T / p_T^{jj}$", bins=[50, 0, 40]),
ShapeVar(var="VVFatJetPtOverbbFatJetPt", label=r"$p^{VV}_T / p^{bb}_T$", bins=[50, 0.4, 2.0]),
# ShapeVar(var="nGoodMuons", label=r"# of Muons", bins=[3, 0, 3]),
# ShapeVar(var="nGoodElectrons", label=r"# of Electrons", bins=[3, 0, 3]),
# ShapeVar(var="nGoodJets", label=r"# of AK4 B-Jets", bins=[5, 0, 5]),
# ShapeVar(var="BDTScore", label=r"BDT Score", bins=[50, 0, 1]),
]


def get_nonres_selection_regions(
Expand Down Expand Up @@ -1002,7 +975,7 @@ def control_plots(
events_dict: Dict[str, pd.DataFrame],
bb_masks: Dict[str, pd.DataFrame],
sig_keys: List[str],
control_plot_vars: Dict[str, Tuple],
control_plot_vars: List[ShapeVar],
plot_dir: str,
year: str,
weight_key: str = "finalWeight",
Expand All @@ -1014,6 +987,7 @@ def control_plots(
sig_scale_dict: Dict[str, float] = None,
combine_pdf: bool = True,
HEM2d: bool = False,
plot_significance: bool = False,
show: bool = False,
log: Tuple[bool, str] = "both",
):
Expand All @@ -1035,17 +1009,14 @@ def control_plots(

if sig_scale_dict is None:
sig_scale_dict = {sig_key: 2e5 for sig_key in sig_keys}
# sig_scale_dict["HHbbVV"] = 2e5

# print(f"{sig_scale_dict = }")

print(control_plot_vars)
print(selection)

for var, (bins, label) in control_plot_vars.items():
if var not in hists:
hists[var] = utils.singleVarHist(
events_dict, var, bins, label, bb_masks, weight_key=weight_key, selection=selection
for shape_var in control_plot_vars:
if shape_var.var not in hists:
hists[shape_var.var] = utils.singleVarHist(
events_dict, shape_var, bb_masks, weight_key=weight_key, selection=selection
)

if HEM2d and year == "2018":
Expand All @@ -1069,15 +1040,17 @@ def control_plots(

merger_control_plots = PdfMerger()

for var, var_hist in hists.items():
name = f"{tplot_dir}/{cutstr}{var}{logstr}.pdf"
for shape_var in control_plot_vars:
name = f"{tplot_dir}/{cutstr}{shape_var.var}{logstr}.pdf"
plotting.ratioHistPlot(
var_hist,
hists[shape_var.var],
year,
plot_sig_keys,
bg_keys,
name=name,
sig_scale_dict=tsig_scale_dict if not log else None,
plot_significance=plot_significance,
significance_dir=shape_var.significance_dir,
show=show,
log=log,
ylim=None if not log else 1e15,
Expand Down
Loading

0 comments on commit 6e765c9

Please sign in to comment.