update combine setup

rkansal47 · Mar 29, 2024 · 6a78941 · 6a78941
1 parent 3442efc
commit 6a78941
Show file tree

Hide file tree

Showing 3 changed files with 170 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -407,11 +407,14 @@ python PlotFits.py --fit-file "cards/test_tied_stats/fitDiagnosticsBlindedBkgOnl
 
 ### CMSSW + Combine Quickstart
 
+**Warning: this should be done outside of your conda/mamba environment!**
+
 ```bash
+source /cvmfs/cms.cern.ch/cmsset_default.sh
 cmsrel CMSSW_11_3_4
 cd CMSSW_11_3_4/src
 cmsenv
-# git clone -b regex-float-parameters https://github.com/rkansal47/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit
+# git clone -b main https://github.com/rkansal47/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit
 # float regex PR was merged so we should be able to switch to the main branch now:
 git clone -b v9.2.0 https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit
 git clone -b v2.0.0 https://github.com/cms-analysis/CombineHarvester.git CombineHarvester

diff --git a/src/HHbbVV/corrections/checkJMR.ipynb b/src/HHbbVV/corrections/checkJMR.ipynb
@@ -0,0 +1,107 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import matplotlib.ticker as mticker\n",
+    "import mplhep as hep\n",
+    "\n",
+    "plt.style.use(hep.style.CMS)\n",
+    "hep.style.use(\"CMS\")\n",
+    "formatter = mticker.ScalarFormatter(useMathText=True)\n",
+    "formatter.set_powerlimits((-3, 3))\n",
+    "plt.rcParams.update({\"font.size\": 24})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Smearing using TWiki formula"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rng = np.random.default_rng()\n",
+    "N = 100_000\n",
+    "x = rng.normal(125, 11.4, N)\n",
+    "\n",
+    "jmr = 1.03\n",
+    "jmr_up = 1.07\n",
+    "\n",
+    "smearing = np.random.normal(scale=0.09, size=N)\n",
+    "# gaussian with\n",
+    "jmr_nom = (smearing * max(np.sqrt(jmr**2 - 1), 0)) + 1\n",
+    "jmr_up = (smearing * max(np.sqrt(jmr_up**2 - 1), 0)) + 1\n",
+    "\n",
+    "plt.hist(x, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"$N(\\mu=0, \\sigma=1)$\")\n",
+    "plt.hist(x * jmr_nom, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"1.03 smearing\")\n",
+    "plt.hist(x * jmr_up, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"1.07 smearing\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Smearing using current Gaussian method"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rng = np.random.default_rng()\n",
+    "N = 100_000\n",
+    "x = rng.normal(125, 11.4, N)\n",
+    "\n",
+    "jmr = 1.03\n",
+    "jmr_up = 1.07\n",
+    "\n",
+    "smearing = np.random.normal(size=N)\n",
+    "# gaussian with\n",
+    "jmr_nom = (smearing * max(jmr - 1, 0)) + 1\n",
+    "jmr_up = (smearing * max(jmr_up - 1, 0)) + 1\n",
+    "\n",
+    "plt.hist(x, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"$N(\\mu=0, \\sigma=1)$\")\n",
+    "plt.hist(x * jmr_nom, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"1.03 smearing\")\n",
+    "plt.hist(x * jmr_up, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"1.07 smearing\")\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python310",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/HHbbVV/postprocessing/PostProcess.ipynb b/src/HHbbVV/postprocessing/PostProcess.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,7 +13,7 @@
     "from collections import OrderedDict\n",
     "\n",
     "from utils import CUT_MAX_VAL, ShapeVar\n",
-    "from hh_vars import (\n",
+    "from HHbbVV.hh_vars import (\n",
     "    years,\n",
     "    data_key,\n",
     "    qcd_key,\n",
@@ -56,7 +56,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,18 +66,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "MAIN_DIR = \"../../../\"\n",
-    "# samples_dir = f\"{MAIN_DIR}/../data/skimmer/24Feb25_update_skimmer\"\n",
-    "# signal_samples_dir = f\"{MAIN_DIR}/../data/skimmer/Jun10\"\n",
-    "# bdt_preds_dir = f\"{MAIN_DIR}/../data/skimmer/Feb24/23_05_12_multiclass_rem_feats_3/inferences/\"\n",
-    "# samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Feb24/\"\n",
-    "# signal_samples_dir = \"/eos/uscms/store/user/cmantill/bbVV/skimmer/Jun10/\"\n",
-    "# bdt_data_dir = \"/eos/uscms/store/user/cmantill/bbVV/skimmer/Jun10/bdt_data/\"\n",
-    "year = \"2017\"\n",
+    "samples_dir = f\"{MAIN_DIR}/../data/skimmer/24Mar14UpdateData\"\n",
+    "year = \"2018\"\n",
     "\n",
     "date = \"24Feb28\"\n",
     "plot_dir = f\"../../../plots/PostProcessing/{date}/\"\n",
@@ -100,20 +95,11 @@
    "outputs": [],
    "source": [
     "# add both VBF keys just in case (the unused one will be removed below)\n",
-    "nonres_samples[\"qqHH_CV_1_C2V_1_kl_1_HHbbVV\"] = \"VBF_HHTobbVV_CV_1_C2V_1_C3_1\"\n",
-    "\n",
-    "with open(f\"{bdt_preds_dir}/{year}/sample_order.txt\", \"r\") as f:\n",
-    "    BDT_sample_order = list(eval(f.read()).keys())\n",
-    "\n",
-    "for key in nonres_sig_keys.copy():\n",
-    "    if key not in BDT_sample_order:\n",
-    "        del nonres_samples[key]\n",
-    "        nonres_sig_keys.remove(key)\n",
-    "\n",
-    "for key in bg_keys.copy():\n",
-    "    if key not in BDT_sample_order:\n",
-    "        del samples[key]\n",
-    "        bg_keys.remove(key)"
+    "nonres_samples = {\n",
+    "    \"HHbbVV\": \"GluGluToHHTobbVV_node_cHHH1\",\n",
+    "    \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": \"VBF_HHTobbVV_CV_1_C2V_0_C3_1\",\n",
+    "    \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": \"VBF_HHTobbVV_CV_1_C2V_2_C3_1\",\n",
+    "}"
    ]
   },
   {
@@ -130,7 +116,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "filters = postprocessing.new_filters\n",
+    "filters = postprocessing.load_filters\n",
     "systematics = {year: {}}\n",
     "\n",
     "# save cutflow as pandas table\n",
@@ -140,14 +126,59 @@
     "\n",
     "# no HEM cleaning for now because it wasn't done for BDT samples\n",
     "events_dict = postprocessing.load_samples(\n",
-    "    samples_dir, nonres_samples, year, filters, hem_cleaning=True\n",
+    "    samples_dir, nonres_samples, year, filters, hem_cleaning=False\n",
     ")\n",
     "# events_dict |= utils.load_samples(samples_dir, samples, year, filters, hem_cleaning=False)\n",
     "\n",
-    "utils.add_to_cutflow(events_dict, \"BDTPreselection\", \"weight\", cutflow)\n",
+    "# utils.add_to_cutflow(events_dict, \"BDTPreselection\", \"weight\", cutflow)\n",
     "cutflow"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Checking mass resolution and smearing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_mass(events, shift=\"\", i=0):\n",
+    "    if shift == \"\":\n",
+    "        mass = events[\"ak8FatJetParticleNetMass\"][i]\n",
+    "    elif shift == \"up\":\n",
+    "        mass = events[\"ak8FatJetParticleNetMass_JMR_up\"][i]\n",
+    "    elif shift == \"down\":\n",
+    "        mass = events[\"ak8FatJetParticleNetMass_JMR_down\"][i]\n",
+    "\n",
+    "    mass = mass[(mass > 100) * (mass < 150)]\n",
+    "    return mass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "events = events_dict[\"qqHH_CV_1_C2V_0_kl_1_HHbbVV\"]\n",
+    "events = events_dict[\"qqHH_CV_1_C2V_2_kl_1_HHbbVV\"]\n",
+    "events = events_dict[\"HHbbVV\"]\n",
+    "for shift in [\"\", \"up\", \"down\"]:\n",
+    "    print(shift)\n",
+    "    mass = get_mass(events, shift)\n",
+    "    print(\"mean\", np.mean(mass))\n",
+    "    print(\"std\", np.std(mass))\n",
+    "    print(\"\")\n",
+    "\n",
+    "# print(np.std(events[\"ak8FatJetParticleNetMass_JMR_up\"][0]) / np.std(events[\"ak8FatJetParticleNetMass\"][0]))\n",
+    "# print(np.std(events[\"ak8FatJetParticleNetMass\"][0]) / np.std(events[\"ak8FatJetParticleNetMass_JMR_down\"][0]))"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",