diff --git a/README.md b/README.md index cfc65d5f..988b10ff 100644 --- a/README.md +++ b/README.md @@ -407,11 +407,14 @@ python PlotFits.py --fit-file "cards/test_tied_stats/fitDiagnosticsBlindedBkgOnl ### CMSSW + Combine Quickstart +**Warning: this should be done outside of your conda/mamba environment!** + ```bash +source /cvmfs/cms.cern.ch/cmsset_default.sh cmsrel CMSSW_11_3_4 cd CMSSW_11_3_4/src cmsenv -# git clone -b regex-float-parameters https://github.com/rkansal47/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit +# git clone -b main https://github.com/rkansal47/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit # float regex PR was merged so we should be able to switch to the main branch now: git clone -b v9.2.0 https://github.com/cms-analysis/HiggsAnalysis-CombinedLimit.git HiggsAnalysis/CombinedLimit git clone -b v2.0.0 https://github.com/cms-analysis/CombineHarvester.git CombineHarvester diff --git a/src/HHbbVV/corrections/checkJMR.ipynb b/src/HHbbVV/corrections/checkJMR.ipynb new file mode 100644 index 00000000..9efaff44 --- /dev/null +++ b/src/HHbbVV/corrections/checkJMR.ipynb @@ -0,0 +1,107 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.ticker as mticker\n", + "import mplhep as hep\n", + "\n", + "plt.style.use(hep.style.CMS)\n", + "hep.style.use(\"CMS\")\n", + "formatter = mticker.ScalarFormatter(useMathText=True)\n", + "formatter.set_powerlimits((-3, 3))\n", + "plt.rcParams.update({\"font.size\": 24})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Smearing using TWiki formula" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rng = np.random.default_rng()\n", + "N = 100_000\n", + "x = rng.normal(125, 11.4, N)\n", + "\n", + "jmr = 1.03\n", + "jmr_up = 1.07\n", + "\n", + "smearing = np.random.normal(scale=0.09, size=N)\n", + "# gaussian with\n", + "jmr_nom = (smearing * max(np.sqrt(jmr**2 - 1), 0)) + 1\n", + "jmr_up = (smearing * max(np.sqrt(jmr_up**2 - 1), 0)) + 1\n", + "\n", + "plt.hist(x, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"$N(\\mu=0, \\sigma=1)$\")\n", + "plt.hist(x * jmr_nom, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"1.03 smearing\")\n", + "plt.hist(x * jmr_up, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"1.07 smearing\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Smearing using current Gaussian method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rng = np.random.default_rng()\n", + "N = 100_000\n", + "x = rng.normal(125, 11.4, N)\n", + "\n", + "jmr = 1.03\n", + "jmr_up = 1.07\n", + "\n", + "smearing = np.random.normal(size=N)\n", + "# gaussian with\n", + "jmr_nom = (smearing * max(jmr - 1, 0)) + 1\n", + "jmr_up = (smearing * max(jmr_up - 1, 0)) + 1\n", + "\n", + "plt.hist(x, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"$N(\\mu=0, \\sigma=1)$\")\n", + "plt.hist(x * jmr_nom, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"1.03 smearing\")\n", + "plt.hist(x * jmr_up, bins=np.linspace(100, 150, 20), histtype=\"step\", label=r\"1.07 smearing\")\n", + "plt.legend()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python310", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/HHbbVV/postprocessing/PostProcess.ipynb b/src/HHbbVV/postprocessing/PostProcess.ipynb index a864e71e..eec91bcd 100644 --- a/src/HHbbVV/postprocessing/PostProcess.ipynb +++ b/src/HHbbVV/postprocessing/PostProcess.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -13,7 +13,7 @@ "from collections import OrderedDict\n", "\n", "from utils import CUT_MAX_VAL, ShapeVar\n", - "from hh_vars import (\n", + "from HHbbVV.hh_vars import (\n", " years,\n", " data_key,\n", " qcd_key,\n", @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -66,18 +66,13 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "MAIN_DIR = \"../../../\"\n", - "# samples_dir = f\"{MAIN_DIR}/../data/skimmer/24Feb25_update_skimmer\"\n", - "# signal_samples_dir = f\"{MAIN_DIR}/../data/skimmer/Jun10\"\n", - "# bdt_preds_dir = f\"{MAIN_DIR}/../data/skimmer/Feb24/23_05_12_multiclass_rem_feats_3/inferences/\"\n", - "# samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Feb24/\"\n", - "# signal_samples_dir = \"/eos/uscms/store/user/cmantill/bbVV/skimmer/Jun10/\"\n", - "# bdt_data_dir = \"/eos/uscms/store/user/cmantill/bbVV/skimmer/Jun10/bdt_data/\"\n", - "year = \"2017\"\n", + "samples_dir = f\"{MAIN_DIR}/../data/skimmer/24Mar14UpdateData\"\n", + "year = \"2018\"\n", "\n", "date = \"24Feb28\"\n", "plot_dir = f\"../../../plots/PostProcessing/{date}/\"\n", @@ -100,20 +95,11 @@ "outputs": [], "source": [ "# add both VBF keys just in case (the unused one will be removed below)\n", - "nonres_samples[\"qqHH_CV_1_C2V_1_kl_1_HHbbVV\"] = \"VBF_HHTobbVV_CV_1_C2V_1_C3_1\"\n", - "\n", - "with open(f\"{bdt_preds_dir}/{year}/sample_order.txt\", \"r\") as f:\n", - " BDT_sample_order = list(eval(f.read()).keys())\n", - "\n", - "for key in nonres_sig_keys.copy():\n", - " if key not in BDT_sample_order:\n", - " del nonres_samples[key]\n", - " nonres_sig_keys.remove(key)\n", - "\n", - "for key in bg_keys.copy():\n", - " if key not in BDT_sample_order:\n", - " del samples[key]\n", - " bg_keys.remove(key)" + "nonres_samples = {\n", + " \"HHbbVV\": \"GluGluToHHTobbVV_node_cHHH1\",\n", + " \"qqHH_CV_1_C2V_0_kl_1_HHbbVV\": \"VBF_HHTobbVV_CV_1_C2V_0_C3_1\",\n", + " \"qqHH_CV_1_C2V_2_kl_1_HHbbVV\": \"VBF_HHTobbVV_CV_1_C2V_2_C3_1\",\n", + "}" ] }, { @@ -130,7 +116,7 @@ "metadata": {}, "outputs": [], "source": [ - "filters = postprocessing.new_filters\n", + "filters = postprocessing.load_filters\n", "systematics = {year: {}}\n", "\n", "# save cutflow as pandas table\n", @@ -140,14 +126,59 @@ "\n", "# no HEM cleaning for now because it wasn't done for BDT samples\n", "events_dict = postprocessing.load_samples(\n", - " samples_dir, nonres_samples, year, filters, hem_cleaning=True\n", + " samples_dir, nonres_samples, year, filters, hem_cleaning=False\n", ")\n", "# events_dict |= utils.load_samples(samples_dir, samples, year, filters, hem_cleaning=False)\n", "\n", - "utils.add_to_cutflow(events_dict, \"BDTPreselection\", \"weight\", cutflow)\n", + "# utils.add_to_cutflow(events_dict, \"BDTPreselection\", \"weight\", cutflow)\n", "cutflow" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Checking mass resolution and smearing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_mass(events, shift=\"\", i=0):\n", + " if shift == \"\":\n", + " mass = events[\"ak8FatJetParticleNetMass\"][i]\n", + " elif shift == \"up\":\n", + " mass = events[\"ak8FatJetParticleNetMass_JMR_up\"][i]\n", + " elif shift == \"down\":\n", + " mass = events[\"ak8FatJetParticleNetMass_JMR_down\"][i]\n", + "\n", + " mass = mass[(mass > 100) * (mass < 150)]\n", + " return mass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "events = events_dict[\"qqHH_CV_1_C2V_0_kl_1_HHbbVV\"]\n", + "events = events_dict[\"qqHH_CV_1_C2V_2_kl_1_HHbbVV\"]\n", + "events = events_dict[\"HHbbVV\"]\n", + "for shift in [\"\", \"up\", \"down\"]:\n", + " print(shift)\n", + " mass = get_mass(events, shift)\n", + " print(\"mean\", np.mean(mass))\n", + " print(\"std\", np.std(mass))\n", + " print(\"\")\n", + "\n", + "# print(np.std(events[\"ak8FatJetParticleNetMass_JMR_up\"][0]) / np.std(events[\"ak8FatJetParticleNetMass\"][0]))\n", + "# print(np.std(events[\"ak8FatJetParticleNetMass\"][0]) / np.std(events[\"ak8FatJetParticleNetMass_JMR_down\"][0]))" + ] + }, { "attachments": {}, "cell_type": "markdown",