inteprolate signal

rkansal47 · Aug 1, 2024 · f909c39 · f909c39
1 parent f80da02
commit f909c39
Show file tree

Hide file tree

Showing 2 changed files with 387 additions and 10 deletions.
diff --git a/src/HHbbVV/postprocessing/InterpolateSignal.ipynb b/src/HHbbVV/postprocessing/InterpolateSignal.ipynb
@@ -0,0 +1,376 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle, json, gzip\n",
+    "import numpy as np\n",
+    "import hist\n",
+    "from hist import Hist\n",
+    "\n",
+    "from typing import Optional, List, Dict\n",
+    "from copy import copy\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import mplhep as hep\n",
+    "from matplotlib import colors\n",
+    "\n",
+    "from tqdm import tqdm\n",
+    "\n",
+    "from pathlib import Path\n",
+    "import os\n",
+    "\n",
+    "from HHbbVV.hh_vars import years, bg_keys\n",
+    "from HHbbVV.postprocessing import datacardHelpers\n",
+    "from postprocessing import nonres_shape_vars as shape_vars\n",
+    "import plotting\n",
+    "\n",
+    "plt.rcParams.update({\"font.size\": 16})\n",
+    "plt.style.use(hep.style.CMS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_dir = Path(\"../../plots/Interpolate/24Aug1\")\n",
+    "plot_dir.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "templates_dir = Path(\"templates/24Apr26NonresBDT995AllSigs\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load and process templates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "templates_dict: dict[str, dict[str, Hist]] = {}\n",
+    "\n",
+    "for year in years:\n",
+    "    with (templates_dir / f\"{year}_templates.pkl\").open(\"rb\") as f:\n",
+    "        templates_dict[year] = datacardHelpers.rem_neg(pickle.load(f))\n",
+    "\n",
+    "templates = datacardHelpers.sum_templates(templates_dict, years)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vbf_keys = [\n",
+    "    \"VBFHHbbVV\",\n",
+    "    \"qqHH_CV_1_C2V_1_kl_0_HHbbVV\",\n",
+    "    \"qqHH_CV_1_C2V_1_kl_2_HHbbVV\",\n",
+    "    \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\",\n",
+    "    \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\",\n",
+    "    \"qqHH_CV_1p5_C2V_1_kl_1_HHbbVV\",\n",
+    "]\n",
+    "\n",
+    "vbf_hists = {}\n",
+    "\n",
+    "for key, h in templates.items():\n",
+    "    vbf_hists[key] = []\n",
+    "    for vbf_key in vbf_keys:\n",
+    "        vbf_hists[key].append(h[vbf_key, ...])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Interpolation coefficients"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sympy\n",
+    "\n",
+    "csamples = [\n",
+    "    # CV, C2V, kl\n",
+    "    (1.0, 1.0, 1.0),\n",
+    "    (1.0, 1.0, 0.0),\n",
+    "    (1.0, 1.0, 2.0),\n",
+    "    (1.0, 0.0, 1.0),\n",
+    "    (1.0, 2.0, 1.0),\n",
+    "    # (0.5, 1.0, 1.0),\n",
+    "    (1.5, 1.0, 1.0),\n",
+    "]\n",
+    "\n",
+    "M = sympy.Matrix(\n",
+    "    [\n",
+    "        [\n",
+    "            CV**2 * kl**2,\n",
+    "            CV**4,\n",
+    "            C2V**2,\n",
+    "            CV**3 * kl,\n",
+    "            CV * C2V * kl,\n",
+    "            CV**2 * C2V,\n",
+    "        ]\n",
+    "        for i, (CV, C2V, kl) in enumerate(csamples)\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "# the vector of couplings\n",
+    "CV, C2V, kl = sympy.symbols(\"CV C2V kl\")\n",
+    "c = sympy.Matrix(\n",
+    "    [\n",
+    "        [CV**2 * kl**2],\n",
+    "        [CV**4],\n",
+    "        [C2V**2],\n",
+    "        [CV**3 * kl],\n",
+    "        [CV * C2V * kl],\n",
+    "        [CV**2 * C2V],\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "# the vector of symbolic sample cross sections\n",
+    "s = sympy.Matrix([[sympy.Symbol(\"xs{}\".format(i))] for i in range(len(csamples))])\n",
+    "\n",
+    "# actual computation, i.e., matrix inversion and multiplications with vectors\n",
+    "M_inv = M.pinv()\n",
+    "coeffs = c.transpose() * M_inv\n",
+    "sigma = coeffs * s"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_hist_interp(cv, c2v, Kl, hists):\n",
+    "    sigma_val = sigma.subs({CV: cv, C2V: c2v, kl: Kl})\n",
+    "    counts = []\n",
+    "    errs = []\n",
+    "    for i in range(len(hists[0].values())):\n",
+    "        count = np.array(\n",
+    "            sigma_val.subs(\n",
+    "                {sympy.Symbol(f\"xs{j}\"): hists[j].values()[i] for j in range(len(vbf_keys))}\n",
+    "            )\n",
+    "        )[0][0]\n",
+    "        err = np.array(\n",
+    "            sigma_val.subs(\n",
+    "                {\n",
+    "                    sympy.Symbol(f\"xs{j}\"): np.nan_to_num(\n",
+    "                        np.sqrt(hists[j].variances()[i]) / hists[j].values()[i]\n",
+    "                    )\n",
+    "                    for j in range(len(vbf_keys))\n",
+    "                }\n",
+    "            )\n",
+    "        )[0][0]\n",
+    "\n",
+    "        if count < 1e-12:\n",
+    "            count = 0\n",
+    "\n",
+    "        counts.append(count)\n",
+    "        errs.append(err)\n",
+    "\n",
+    "    return np.array(counts), np.array(errs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Add interpolated signals to templates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "interp_points = np.arange(-1.0, 3.1, 0.1)\n",
+    "samples = [f\"qqHH_CV_1_C2V_{c:.1f}_kl_1_HHbbVV\" for c in interp_points]\n",
+    "\n",
+    "interp_hists = {}\n",
+    "\n",
+    "for region in [\"passvbf\", \"passggf\", \"fail\"]:\n",
+    "    print(region)\n",
+    "    h = Hist(\n",
+    "        hist.axis.StrCategory(samples, name=\"Sample\"),\n",
+    "        *templates[\"passvbf\"].axes[1:],\n",
+    "        storage=hist.storage.Weight(),\n",
+    "    )\n",
+    "    for i, c in tqdm(enumerate(interp_points)):\n",
+    "        c_h, c_err = get_hist_interp(1.0, c, 1.0, vbf_hists[region])\n",
+    "        h.values()[i, :] = c_h\n",
+    "        h.variances()[i, :] = (c_err * c_h) ** 2\n",
+    "\n",
+    "    interp_hists[region] = h"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ctemplates = {}\n",
+    "\n",
+    "for region in [\"passvbf\", \"passggf\", \"fail\"]:\n",
+    "    template = templates[region]\n",
+    "    # combined sig + bg samples\n",
+    "    csamples = list(template.axes[0]) + samples\n",
+    "\n",
+    "    # new hist with all samples\n",
+    "    ctemplate = Hist(\n",
+    "        hist.axis.StrCategory(csamples, name=\"Sample\"),\n",
+    "        *template.axes[1:],\n",
+    "        storage=\"weight\",\n",
+    "    )\n",
+    "\n",
+    "    # add background hists\n",
+    "    for sample in template.axes[0]:\n",
+    "        sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n",
+    "        ctemplate.view(flow=True)[sample_key_index, ...] = template[sample, ...].view(flow=True)\n",
+    "\n",
+    "    # add signal hists\n",
+    "    for sample in samples:\n",
+    "        sample_key_index = np.where(np.array(list(ctemplate.axes[0])) == sample)[0][0]\n",
+    "        ctemplate.view(flow=True)[sample_key_index, ...] = interp_hists[region][sample, ...].view(\n",
+    "            flow=True\n",
+    "        )\n",
+    "\n",
+    "    ctemplates[region] = ctemplate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from hist.intervals import poisson_interval\n",
+    "\n",
+    "(\n",
+    "    poisson_interval(ctemplates[region][\"Data\", ...].values())\n",
+    "    - ctemplates[region][\"Data\", ...].values()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ctemplates[\"passvbf\"][\"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\", ...].values()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "selection_regions = {\n",
+    "    \"passvbf\": \"VBF\",\n",
+    "    \"passggf\": \"ggF\",\n",
+    "    # \"fail\": \"Fail\",\n",
+    "}\n",
+    "\n",
+    "ylims = {\"passggf\": 200, \"passvbf\": 100, \"fail\": 7e5}\n",
+    "\n",
+    "sig_scale_dict = {\n",
+    "    # \"HHbbVV\": 100,\n",
+    "    # \"VBFHHbbVV\": 2000,\n",
+    "    \"qqHH_CV_1_C2V_1.6_kl_1_HHbbVV\": 1,\n",
+    "    \"qqHH_CV_1_C2V_0.6_kl_1_HHbbVV\": 1,\n",
+    "    \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": 1,\n",
+    "    \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": 1,\n",
+    "}\n",
+    "\n",
+    "for region, region_label in selection_regions.items():\n",
+    "    pass_region = region.startswith(\"pass\")\n",
+    "    for i, shape_var in enumerate(shape_vars):\n",
+    "        plot_params = {\n",
+    "            \"hists\": ctemplates[region],\n",
+    "            \"sig_keys\": list(sig_scale_dict.keys()),\n",
+    "            \"bg_keys\": [],\n",
+    "            \"sig_scale_dict\": sig_scale_dict if pass_region else None,\n",
+    "            \"show\": True,\n",
+    "            \"year\": \"all\",\n",
+    "            \"ylim\": ylims[region],\n",
+    "            \"title\": f\"Pre-fit {region_label} Region\",\n",
+    "            \"name\": f\"{plot_dir}/interp_{region}_{shape_var.var}_signal_log.pdf\",\n",
+    "            \"ncol\": 2,  # if region == \"passvbf\" else 1,\n",
+    "            \"ratio_ylims\": [0, 5] if region == \"passvbf\" else [0, 2],\n",
+    "            \"cmslabel\": \"Preliminary\",\n",
+    "            \"plot_data\": False,\n",
+    "            \"log\": True,\n",
+    "        }\n",
+    "\n",
+    "        plotting.ratioHistPlot(**plot_params, data_err=True)\n",
+    "\n",
+    "#     break\n",
+    "# break"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}