Skip to content

Commit

Permalink
inteprolate signal
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Aug 1, 2024
1 parent f80da02 commit f909c39
Show file tree
Hide file tree
Showing 2 changed files with 387 additions and 10 deletions.
376 changes: 376 additions & 0 deletions src/HHbbVV/postprocessing/InterpolateSignal.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,376 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pickle, json, gzip\n",
"import numpy as np\n",
"import hist\n",
"from hist import Hist\n",
"\n",
"from typing import Optional, List, Dict\n",
"from copy import copy\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import mplhep as hep\n",
"from matplotlib import colors\n",
"\n",
"from tqdm import tqdm\n",
"\n",
"from pathlib import Path\n",
"import os\n",
"\n",
"from HHbbVV.hh_vars import years, bg_keys\n",
"from HHbbVV.postprocessing import datacardHelpers\n",
"from postprocessing import nonres_shape_vars as shape_vars\n",
"import plotting\n",
"\n",
"plt.rcParams.update({\"font.size\": 16})\n",
"plt.style.use(hep.style.CMS)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot_dir = Path(\"../../plots/Interpolate/24Aug1\")\n",
"plot_dir.mkdir(parents=True, exist_ok=True)\n",
"\n",
"templates_dir = Path(\"templates/24Apr26NonresBDT995AllSigs\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load and process templates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"templates_dict: dict[str, dict[str, Hist]] = {}\n",
"\n",
"for year in years:\n",
" with (templates_dir / f\"{year}_templates.pkl\").open(\"rb\") as f:\n",
" templates_dict[year] = datacardHelpers.rem_neg(pickle.load(f))\n",
"\n",
"templates = datacardHelpers.sum_templates(templates_dict, years)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vbf_keys = [\n",
" \"VBFHHbbVV\",\n",
" \"qqHH_CV_1_C2V_1_kl_0_HHbbVV\",\n",
" \"qqHH_CV_1_C2V_1_kl_2_HHbbVV\",\n",
" \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\",\n",
" \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\",\n",
" \"qqHH_CV_1p5_C2V_1_kl_1_HHbbVV\",\n",
"]\n",
"\n",
"vbf_hists = {}\n",
"\n",
"for key, h in templates.items():\n",
" vbf_hists[key] = []\n",
" for vbf_key in vbf_keys:\n",
" vbf_hists[key].append(h[vbf_key, ...])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Interpolation coefficients"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sympy\n",
"\n",
"csamples = [\n",
" # CV, C2V, kl\n",
" (1.0, 1.0, 1.0),\n",
" (1.0, 1.0, 0.0),\n",
" (1.0, 1.0, 2.0),\n",
" (1.0, 0.0, 1.0),\n",
" (1.0, 2.0, 1.0),\n",
" # (0.5, 1.0, 1.0),\n",
" (1.5, 1.0, 1.0),\n",
"]\n",
"\n",
"M = sympy.Matrix(\n",
" [\n",
" [\n",
" CV**2 * kl**2,\n",
" CV**4,\n",
" C2V**2,\n",
" CV**3 * kl,\n",
" CV * C2V * kl,\n",
" CV**2 * C2V,\n",
" ]\n",
" for i, (CV, C2V, kl) in enumerate(csamples)\n",
" ]\n",
")\n",
"\n",
"# the vector of couplings\n",
"CV, C2V, kl = sympy.symbols(\"CV C2V kl\")\n",
"c = sympy.Matrix(\n",
" [\n",
" [CV**2 * kl**2],\n",
" [CV**4],\n",
" [C2V**2],\n",
" [CV**3 * kl],\n",
" [CV * C2V * kl],\n",
" [CV**2 * C2V],\n",
" ]\n",
")\n",
"\n",
"# the vector of symbolic sample cross sections\n",
"s = sympy.Matrix([[sympy.Symbol(\"xs{}\".format(i))] for i in range(len(csamples))])\n",
"\n",
"# actual computation, i.e., matrix inversion and multiplications with vectors\n",
"M_inv = M.pinv()\n",
"coeffs = c.transpose() * M_inv\n",
"sigma = coeffs * s"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_hist_interp(cv, c2v, Kl, hists):\n",
" sigma_val = sigma.subs({CV: cv, C2V: c2v, kl: Kl})\n",
" counts = []\n",
" errs = []\n",
" for i in range(len(hists[0].values())):\n",
" count = np.array(\n",
" sigma_val.subs(\n",
" {sympy.Symbol(f\"xs{j}\"): hists[j].values()[i] for j in range(len(vbf_keys))}\n",
" )\n",
" )[0][0]\n",
" err = np.array(\n",
" sigma_val.subs(\n",
" {\n",
" sympy.Symbol(f\"xs{j}\"): np.nan_to_num(\n",
" np.sqrt(hists[j].variances()[i]) / hists[j].values()[i]\n",
" )\n",
" for j in range(len(vbf_keys))\n",
" }\n",
" )\n",
" )[0][0]\n",
"\n",
" if count < 1e-12:\n",
" count = 0\n",
"\n",
" counts.append(count)\n",
" errs.append(err)\n",
"\n",
" return np.array(counts), np.array(errs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Add interpolated signals to templates"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"interp_points = np.arange(-1.0, 3.1, 0.1)\n",
"samples = [f\"qqHH_CV_1_C2V_{c:.1f}_kl_1_HHbbVV\" for c in interp_points]\n",
"\n",
"interp_hists = {}\n",
"\n",
"for region in [\"passvbf\", \"passggf\", \"fail\"]:\n",
" print(region)\n",
" h = Hist(\n",
" hist.axis.StrCategory(samples, name=\"Sample\"),\n",
" *templates[\"passvbf\"].axes[1:],\n",
" storage=hist.storage.Weight(),\n",
" )\n",
" for i, c in tqdm(enumerate(interp_points)):\n",
" c_h, c_err = get_hist_interp(1.0, c, 1.0, vbf_hists[region])\n",
" h.values()[i, :] = c_h\n",
" h.variances()[i, :] = (c_err * c_h) ** 2\n",
"\n",
" interp_hists[region] = h"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ctemplates = {}\n",
"\n",
"for region in [\"passvbf\", \"passggf\", \"fail\"]:\n",
" template = templates[region]\n",
" # combined sig + bg samples\n",
" csamples = list(template.axes[0]) + samples\n",
"\n",
" # new hist with all samples\n",
" ctemplate = Hist(\n",
" hist.axis.StrCategory(csamples, name=\"Sample\"),\n",
" *template.axes[1:],\n",
" storage=\"weight\",\n",
" )\n",
"\n",
" # add background hists\n",
" for sample in template.axes[0]:\n",
" sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n",
" ctemplate.view(flow=True)[sample_key_index, ...] = template[sample, ...].view(flow=True)\n",
"\n",
" # add signal hists\n",
" for sample in samples:\n",
" sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n",
" ctemplate.view(flow=True)[sample_key_index, ...] = interp_hists[region][sample, ...].view(\n",
" flow=True\n",
" )\n",
"\n",
" ctemplates[region] = ctemplate"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from hist.intervals import poisson_interval\n",
"\n",
"(\n",
" poisson_interval(ctemplates[region][\"Data\", ...].values())\n",
" - ctemplates[region][\"Data\", ...].values()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ctemplates[\"passvbf\"][\"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\", ...].values()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"selection_regions = {\n",
" \"passvbf\": \"VBF\",\n",
" \"passggf\": \"ggF\",\n",
" # \"fail\": \"Fail\",\n",
"}\n",
"\n",
"ylims = {\"passggf\": 200, \"passvbf\": 100, \"fail\": 7e5}\n",
"\n",
"sig_scale_dict = {\n",
" # \"HHbbVV\": 100,\n",
" # \"VBFHHbbVV\": 2000,\n",
" \"qqHH_CV_1_C2V_1.6_kl_1_HHbbVV\": 1,\n",
" \"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\": 1,\n",
" \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 1,\n",
" \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": 1,\n",
"}\n",
"\n",
"for region, region_label in selection_regions.items():\n",
" pass_region = region.startswith(\"pass\")\n",
" for i, shape_var in enumerate(shape_vars):\n",
" plot_params = {\n",
" \"hists\": ctemplates[region],\n",
" \"sig_keys\": list(sig_scale_dict.keys()),\n",
" \"bg_keys\": [],\n",
" \"sig_scale_dict\": sig_scale_dict if pass_region else None,\n",
" \"show\": True,\n",
" \"year\": \"all\",\n",
" \"ylim\": ylims[region],\n",
" \"title\": f\"Pre-fit {region_label} Region\",\n",
" \"name\": f\"{plot_dir}/interp_{region}_{shape_var.var}_signal_log.pdf\",\n",
" \"ncol\": 2, # if region == \"passvbf\" else 1,\n",
" \"ratio_ylims\": [0, 5] if region == \"passvbf\" else [0, 2],\n",
" \"cmslabel\": \"Preliminary\",\n",
" \"plot_data\": False,\n",
" \"log\": True,\n",
" }\n",
"\n",
" plotting.ratioHistPlot(**plot_params, data_err=True)\n",
"\n",
"# break\n",
"# break"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "python310",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit f909c39

Please sign in to comment.