submit bias

rkansal47 · Jul 10, 2023 · 2d3e213 · 2d3e213
1 parent f67eef7
commit 2d3e213
Show file tree

Hide file tree

Showing 16 changed files with 522 additions and 70 deletions.
diff --git a/README.md b/README.md
@@ -304,28 +304,49 @@ git clone -b v2.0.0 https://github.com/cms-analysis/CombineHarvester.git Combine
 scramv1 b clean; scramv1 b
 ```
 
+I also add this to my .bashrc for convenience:
+
+```
+export PATH="$PATH:/uscms_data/d1/rkansal/HHbbVV/src/HHbbVV/combine"
+
+csubmit() {
+    local file=$1; shift;
+    python "/uscms_data/d1/rkansal/HHbbVV/src/HHbbVV/combine/submit/submit_${file}.py" "$@"
+}
+```
+
 ### Run fits and diagnostics locally
 
 All via the below script, with a bunch of options (see script):
 
 ```bash
-/uscms/home/rkansal/nobackup/HHbbVV/src/HHbbVV/combine/run_blinded.sh --workspace --bfit --limits
+run_blinded.sh --workspace --bfit --limits
 ```
 
 ### Run fits on condor
 
 Can run over all the resonant signals (default) or scan working points for a subset of signals (`--scan`)
 
 ```bash
-python src/HHbbVV/combine/submit.py --test --scan --resonant --templates-dir 23Apr30Scan
+csubmit cards --test --scan --resonant --templates-dir 23Apr30Scan
 ```
 
 Generate toys and fits for F-tests (after making cards and b-only fits)
 
 ```bash
-python src/HHbbVV/combine/submit_ftest.py --tag 23May2 --cards-tag 23May2 --low1 0 --low2 0
+csubmit f_test --tag 23May2 --cards-tag 23May2 --low1 0 --low2 0
 ```
 
+Bias tests:
+
+```bash
+for bias in 0 0.15 0.3
+do
+  csubmit bias --seed 42 --num-jobs 10 --toys-per-job 10 --bias $bias --submit
+done
+```
+
+
 ## Misc
 
 ### Command for copying directories to PRP in background 

diff --git a/src/HHbbVV/combine/binder/BiasTest.ipynb b/src/HHbbVV/combine/binder/BiasTest.ipynb
@@ -0,0 +1,244 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "import uproot\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import mplhep as hep\n",
+    "import matplotlib.ticker as mticker\n",
+    "import os\n",
+    "\n",
+    "plt.style.use(hep.style.CMS)\n",
+    "hep.style.use(\"CMS\")\n",
+    "formatter = mticker.ScalarFormatter(useMathText=True)\n",
+    "formatter.set_powerlimits((-3, 3))\n",
+    "plt.rcParams.update({\"font.size\": 20})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MAIN_DIR = \"../../../../\"\n",
+    "\n",
+    "plot_dir = f\"{MAIN_DIR}/plots/BiasTest/23Jul10Res\"\n",
+    "_ = os.system(f\"mkdir -p {plot_dir}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cards_dir = \"f_tests/23May2/nTF1_2_nTF2_1\"\n",
+    "# biases = [0., 0.15, 0.3, 1.0]\n",
+    "biases = [0.0, 0.15, 0.3, 1.0]\n",
+    "file = uproot.open(\n",
+    "    f\"/uscms/home/rkansal/hhcombine/cards/{cards_dir}/higgsCombinebias0.3.FitDiagnostics.mH125.*.root\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r_dict = {\n",
+    "    0.3: {\n",
+    "        \"r\": np.array(file[\"limit\"][\"trackedParam_r\"])[::4],\n",
+    "        \"rerr\": np.array(file[\"limit\"][\"trackedError_r\"])[::4],\n",
+    "    }\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'r': array([0.25547278], dtype=float32),\n",
+       " 'rerr': array([0.17948698], dtype=float32)}"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "r_dict[0.3]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from scipy import stats\n",
+    "\n",
+    "\n",
+    "def plot_tests(\n",
+    "    data_ts: float,\n",
+    "    toy_ts: np.ndarray,\n",
+    "    name: str,\n",
+    "    title: str = None,\n",
+    "    bins: int = 15,\n",
+    "    fit: str = None,\n",
+    "    fdof2: int = None,\n",
+    "):\n",
+    "    plot_max = max(np.max(toy_ts), data_ts)\n",
+    "    # plot_max = max(np.max(toy_ts), data_ts) if fit != \"chi2\" else 200\n",
+    "    # plot_min = min(np.min(toy_ts), data_ts, 0)\n",
+    "    plot_min = 0\n",
+    "    pval = p_value(data_ts, toy_ts)\n",
+    "\n",
+    "    plt.figure(figsize=(12, 8))\n",
+    "    h = plt.hist(\n",
+    "        toy_ts,\n",
+    "        np.linspace(plot_min, plot_max, bins + 1),\n",
+    "        color=\"#8C8C8C\",\n",
+    "        histtype=\"step\",\n",
+    "        label=f\"{len(toy_ts)} Toys\",\n",
+    "    )\n",
+    "    plt.axvline(data_ts, color=\"#FF502E\", linestyle=\":\", label=rf\"Data ($p$-value = {pval:.2f})\")\n",
+    "\n",
+    "    if fit is not None:\n",
+    "        x = np.linspace(plot_min + 0.01, plot_max, 100)\n",
+    "\n",
+    "        if fit == \"chi2\":\n",
+    "            res = stats.fit(stats.chi2, toy_ts, [(0, 200)])\n",
+    "            pdf = stats.chi2.pdf(x, res.params.df)\n",
+    "            label = rf\"$\\chi^2_{{DoF = {res.params.df:.2f}}}$ Fit\"\n",
+    "        elif fit == \"f\":\n",
+    "            pdf = stats.f.pdf(x, 1, fdof2)\n",
+    "            label = rf\"$F-dist_{{DoF = (1, {fdof2})}}$\"\n",
+    "        else:\n",
+    "            raise ValueError(\"Invalid fit\")\n",
+    "\n",
+    "        plt.plot(\n",
+    "            x,\n",
+    "            pdf * (np.max(h[0]) / np.max(pdf)),\n",
+    "            color=\"#1f78b4\",\n",
+    "            linestyle=\"--\",\n",
+    "            # alpha=0.6,\n",
+    "            label=label,\n",
+    "        )\n",
+    "\n",
+    "    hep.cms.label(\n",
+    "        \"Work in Progress\",\n",
+    "        data=True,\n",
+    "        lumi=138,\n",
+    "        year=None,\n",
+    "    )\n",
+    "\n",
+    "    _ = plt.legend()\n",
+    "    plt.title(title)\n",
+    "    plt.ylabel(\"Number of Toys\")\n",
+    "    plt.xlabel(\"Test Statistics\")\n",
+    "\n",
+    "    plt.savefig(f\"{plot_dir}/{name}.pdf\", bbox_inches=\"tight\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Nonresonant"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "o1 = 0  # order being tested\n",
+    "tlabel = f\"{o1}\"\n",
+    "\n",
+    "data_ts, toy_ts = test_statistics[tlabel][\"data\"][tlabel], test_statistics[tlabel][\"toys\"][tlabel]\n",
+    "plot_tests(data_ts, toy_ts, \"gof\" + tlabel, fit=\"chi2\", bins=20)\n",
+    "\n",
+    "ord1 = 1\n",
+    "tflabel = f\"{ord1}\"\n",
+    "data_ts, toy_ts = pval = (\n",
+    "    test_statistics[tlabel][\"fdata\"][tflabel],\n",
+    "    test_statistics[tlabel][\"ftoys\"][tflabel],\n",
+    ")\n",
+    "plot_tests(data_ts, toy_ts, f\"f{tlabel}_{tflabel}\", title=f\"{o1} vs. {ord1}\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Resonant"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "o1, o2 = 2, 0  # order being tested\n",
+    "tlabel = f\"{o1}{o2}\"\n",
+    "\n",
+    "data_ts, toy_ts = test_statistics[tlabel][\"data\"][tlabel], test_statistics[tlabel][\"toys\"][tlabel]\n",
+    "plot_tests(data_ts, toy_ts, \"gof\" + tlabel, fit=\"chi2\", bins=20)\n",
+    "\n",
+    "for ord1, ord2 in [[o1 + 1, o2], [o1, o2 + 1]]:\n",
+    "    tflabel = f\"{ord1}{ord2}\"\n",
+    "    data_ts, toy_ts = pval = (\n",
+    "        test_statistics[tlabel][\"fdata\"][tflabel],\n",
+    "        test_statistics[tlabel][\"ftoys\"][tflabel],\n",
+    "    )\n",
+    "    plot_tests(data_ts, toy_ts, f\"f{tlabel}_{tflabel}\", title=f\"({o1}, {o2}) vs. ({ord1}, {ord2})\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python39",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.15"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/HHbbVV/combine/binder/F_test.ipynb b/src/HHbbVV/combine/binder/F_test.ipynb
@@ -29,7 +29,7 @@
    "source": [
     "MAIN_DIR = \"../../../../\"\n",
     "\n",
-    "plot_dir = f\"{MAIN_DIR}/plots/FTests/23May14\"\n",
+    "plot_dir = f\"{MAIN_DIR}/plots/FTests/23Jul6WP06\"\n",
     "_ = os.system(f\"mkdir -p {plot_dir}\")"
    ]
   },
@@ -120,9 +120,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "eos_cards_dir = \"/eos/uscms/store/user/rkansal/bbVV/cards/f_tests/23May2/\"\n",
-    "local_cards_dir = \"/uscms/home/rkansal/hhcombine/cards/f_tests/23May2/\"\n",
-    "test_orders = [(0, 0), (1, 0), (0, 1), (1, 1), (2, 1), (1, 2), (2, 0), (0, 2)]\n",
+    "eos_cards_dir = \"/eos/uscms/store/user/rkansal/bbVV/cards/f_tests/23Jul6WP06/\"\n",
+    "local_cards_dir = \"/uscms/home/rkansal/hhcombine/cards/f_tests/23Jul6WP06/\"\n",
+    "# test_orders = [(0, 0), (1, 0), (0, 1), (1, 1), (2, 1), (1, 2), (2, 0), (0, 2)]\n",
+    "test_orders = [(1, 2)]\n",
     "# test_orders = [(3, 1), (2, 2)]\n",
     "test_statistics = {}\n",
     "\n",
@@ -293,6 +294,16 @@
    "outputs": [],
    "source": []
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_ts, toy_ts = tdict[\"data\"][tlabel], tdict[\"toys\"][tlabel]\n",
+    "plot_tests(data_ts, toy_ts, \"gof\" + tlabel, fit=\"chi2\", bins=20)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,