diff --git a/src/HHbbVV/corrections/lp_ratios/signals/2018_TTToSemiLeptonic.hist b/src/HHbbVV/corrections/lp_ratios/signals/2018_TTToSemiLeptonic.hist new file mode 100644 index 00000000..26350442 Binary files /dev/null and b/src/HHbbVV/corrections/lp_ratios/signals/2018_TTToSemiLeptonic.hist differ diff --git a/src/HHbbVV/hh_vars.py b/src/HHbbVV/hh_vars.py index ae1cb108..c25e6b90 100644 --- a/src/HHbbVV/hh_vars.py +++ b/src/HHbbVV/hh_vars.py @@ -259,7 +259,13 @@ ("lp_sf_pt_extrap_vars", 100), ("lp_sf_sys_down", 1), ("lp_sf_sys_up", 1), + ("lp_sf_dist_down", 1), + ("lp_sf_dist_up", 1), + ("lp_sf_np_down", 1), + ("lp_sf_np_up", 1), ("lp_sf_double_matched_event", 1), - ("lp_sf_boundary_quarks", 1), + ("lp_sf_inside_boundary_quarks", 1), + ("lp_sf_outside_boundary_quarks", 1), ("lp_sf_unmatched_quarks", 1), + ("lp_sf_rc_unmatched_quarks", 1), ] diff --git a/src/HHbbVV/postprocessing/TopAnalysis.ipynb b/src/HHbbVV/postprocessing/TopAnalysis.ipynb new file mode 100644 index 00000000..2404d54b --- /dev/null +++ b/src/HHbbVV/postprocessing/TopAnalysis.ipynb @@ -0,0 +1,1176 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import utils\n", + "import plotting\n", + "import numpy as np\n", + "import warnings\n", + "import pandas as pd\n", + "from pathlib import Path\n", + "\n", + "from pandas.errors import SettingWithCopyWarning\n", + "from HHbbVV.hh_vars import data_key\n", + "import postprocessing\n", + "\n", + "# ignore these because they don't seem to apply\n", + "warnings.simplefilter(action=\"ignore\", category=SettingWithCopyWarning)\n", + "\n", + "from PyPDF2 import PdfMerger\n", + "\n", + "from copy import deepcopy\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import mplhep as hep\n", + "import matplotlib.ticker as mticker\n", + "\n", + "plt.style.use(hep.style.CMS)\n", + "hep.style.use(\"CMS\")\n", + "formatter = mticker.ScalarFormatter(useMathText=True)\n", + "formatter.set_powerlimits((-3, 3))\n", + "plt.rcParams.update({\"font.size\": 16})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_dir = Path(\"../../../plots/ttsfs/24Jul24BLDistortion\")\n", + "plot_dir.mkdir(parents=True, exist_ok=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load samples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bg_samples = {\n", + " \"QCD\": \"QCD\",\n", + " \"SingleTop\": [\"ST\"],\n", + " \"TTbar\": [\"TTTo2L2Nu\", \"TTToHadronic\"],\n", + " \"W+Jets\": \"WJets\",\n", + " \"Diboson\": [\"WW\", \"WZ\", \"ZZ\"],\n", + " \"Data\": \"SingleMuon\",\n", + "}\n", + "\n", + "sig_samples = {\"Top\": [\"TTToSemiLeptonic\"]}\n", + "\n", + "samples = {**bg_samples, **sig_samples}\n", + "\n", + "top_matched_key = \"TT Top Matched\"\n", + "\n", + "# data_dir = \"../../../../data/ttsfs/24Feb28_update_lp/\"\n", + "data_dir = \"/ceph/cms/store/user/rkansal/bbVV/ttsfs/24Feb28_update_lp/\"\n", + "signal_data_dir = \"/ceph/cms/store/user/rkansal/bbVV/ttsfs/24Jul23BLDistortion/\"\n", + "year = \"2018\"\n", + "\n", + "# filters = [(\"('ak8FatJetPt', '0')\", \">=\", 500)]\n", + "filters = None\n", + "\n", + "events_dict = postprocessing.load_samples(data_dir, bg_samples, year, hem_cleaning=False)\n", + "events_dict |= postprocessing.load_samples(signal_data_dir, sig_samples, year, hem_cleaning=False)\n", + "\n", + "cutflow = pd.DataFrame(index=list(samples.keys()))\n", + "utils.add_to_cutflow(events_dict, \"Selection\", \"weight\", cutflow)\n", + "cutflow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "HEM Veto Efficiency" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tot_pre = 0\n", + "tot_hem = 0\n", + "for run in [\"A\", \"B\", \"C\", \"D\"]:\n", + " cf = utils.get_pickles(\n", + " f\"{data_dir}/{year}/SingleMuon_Run2018{run}/pickles\", year, f\"SingleMuon_Run2018{run}\"\n", + " )[\"cutflow\"]\n", + " tot_pre += cf[\"ak4_jet\"]\n", + " tot_hem += cf[\"hem_cleaning\"]\n", + "\n", + "print(tot_hem / tot_pre)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Normalization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "scale_samples = [\"Top\", \"TTbar\", \"SingleTop\", \"W+Jets\"] # normalizations are off\n", + "\n", + "total_scale = 0\n", + "total_noscale = 0\n", + "for sample, events in events_dict.items():\n", + " if sample in scale_samples:\n", + " total_scale += events[\"weight\"].sum().values[0]\n", + " elif sample != \"Data\":\n", + " total_noscale += events[\"weight\"].sum().values[0]\n", + "\n", + "print(f\"Total MC: {total_scale + total_noscale}\")\n", + "\n", + "sf = (events_dict[\"Data\"][\"weight\"].sum().values[0] - total_noscale) / total_scale\n", + "for sample, events in events_dict.items():\n", + " if sample in scale_samples:\n", + " events[\"weight\"] *= sf\n", + "\n", + "total = 0\n", + "for sample, events in events_dict.items():\n", + " if sample != \"Data\":\n", + " total += events[\"weight\"].sum().values[0]\n", + "\n", + "print(f\"New Total MC: {total}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "utils.add_to_cutflow(events_dict, \"Scale\", \"weight\", cutflow)\n", + "cutflow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Selection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for key in events_dict:\n", + " events_dict[key] = events_dict[key][events_dict[key][\"ak8FatJetPt\"][0] >= 500]\n", + " events_dict[key] = events_dict[key][events_dict[key][\"ak8FatJetMsd\"][0] >= 125]\n", + " events_dict[key] = events_dict[key][events_dict[key][\"ak8FatJetMsd\"][0] <= 225]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "events_dict[top_matched_key] = events_dict[\"Top\"].loc[events_dict[\"Top\"][\"top_matched\"][0] == 1]\n", + "events_dict[\"TT W Matched\"] = events_dict[\"Top\"].loc[events_dict[\"Top\"][\"w_matched\"][0] == 1]\n", + "events_dict[\"TT Unmatched\"] = pd.concat(\n", + " [\n", + " events_dict[\"TTbar\"],\n", + " # events_dict[\"SingleTop\"],\n", + " events_dict[\"Top\"].loc[events_dict[\"Top\"][\"unmatched\"][0] == 1],\n", + " ]\n", + ")\n", + "# del events_dict[\"Top\"]\n", + "# del events_dict[\"TTbar\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(np.sum(events_dict[top_matched_key][\"weight\"]))\n", + "print(np.sum(events_dict[\"TT W Matched\"][\"weight\"]))\n", + "print(np.sum(events_dict[\"TT Unmatched\"][\"weight\"]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## LP SF Processing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "CLIP = 5.0\n", + "\n", + "events = events_dict[top_matched_key]\n", + "\n", + "up_prong_rc = (\n", + " (events[\"lp_sf_outside_boundary_quarks\"][0] > 0)\n", + " | (events[\"lp_sf_double_matched_event\"][0] > 0)\n", + " | (events[\"lp_sf_unmatched_quarks\"][0] > 0)\n", + ").to_numpy()\n", + "\n", + "down_prong_rc = (\n", + " (events[\"lp_sf_inside_boundary_quarks\"][0] > 0)\n", + " | (events[\"lp_sf_double_matched_event\"][0] > 0)\n", + " | (events[\"lp_sf_unmatched_quarks\"][0] > 0)\n", + ").to_numpy()\n", + "\n", + "rc_unmatched = events[\"lp_sf_rc_unmatched_quarks\"][0] > 0\n", + "\n", + "for shift, prong_rc in [(\"up\", up_prong_rc), (\"down\", down_prong_rc)]:\n", + " np_sfs = events[\"lp_sf_lnN\"][0].to_numpy()\n", + " np_sfs[prong_rc] = events[f\"lp_sf_np_{shift}\"][0][prong_rc]\n", + " np_sfs = np.nan_to_num(np.clip(np_sfs, 1.0 / CLIP, CLIP))\n", + " events.loc[:, (f\"lp_sf_np_{shift}\", 0)] = np_sfs / np.mean(np_sfs, axis=0)\n", + "\n", + "for shift in [\"up\", \"down\"]:\n", + " np_sfs = events[\"lp_sf_lnN\"][0].to_numpy()\n", + " np_sfs[rc_unmatched] = CLIP if shift == \"up\" else 1.0 / CLIP\n", + " np_sfs = np.nan_to_num(np.clip(np_sfs, 1.0 / CLIP, CLIP))\n", + " events[f\"lp_sf_unmatched_{shift}\"] = np_sfs / np.mean(np_sfs, axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(np.mean(rc_unmatched))\n", + "print(np.mean(events[\"lp_sf_unmatched_quarks\"][0] > 0))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Normalization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# normalize scale factors to average to 1\n", + "for key in [\n", + " # \"lp_sf\",\n", + " \"lp_sf_lnN\",\n", + " \"lp_sf_sys_down\",\n", + " \"lp_sf_sys_up\",\n", + " \"lp_sf_dist_down\",\n", + " \"lp_sf_dist_up\",\n", + " \"lp_sf_pt_extrap_vars\",\n", + " \"lp_sfs_bl_ratio\",\n", + "]:\n", + " # cut off at 5\n", + " events_dict[top_matched_key].loc[:, key] = np.clip(\n", + " events_dict[top_matched_key].loc[:, key].values, 1.0 / 5.0, 5.0\n", + " )\n", + "\n", + " if key == \"lp_sfs_bl_ratio\":\n", + " mean_lp_sfs = np.mean(\n", + " np.nan_to_num(\n", + " events_dict[top_matched_key][key][0] * events_dict[top_matched_key][\"lp_sf_lnN\"][0]\n", + " ),\n", + " axis=0,\n", + " )\n", + " else:\n", + " mean_lp_sfs = np.mean(np.nan_to_num(events_dict[top_matched_key][key]), axis=0)\n", + "\n", + " events_dict[top_matched_key].loc[:, key] = (\n", + " np.nan_to_num(events_dict[top_matched_key].loc[:, key]) / mean_lp_sfs\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.rcParams.update({\"font.size\": 24})\n", + "plt.figure(figsize=(12, 12))\n", + "_ = plt.hist(\n", + " events_dict[top_matched_key][\"lp_sf_lnN\"][10].values,\n", + " np.logspace(-4, 2, 101, base=10),\n", + " histtype=\"step\",\n", + ")\n", + "plt.xscale(\"log\")\n", + "# plt.yscale(\"log\")\n", + "plt.xlabel(\"LP SF\")\n", + "plt.title(\"Scale factor distribution\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# events = events_dict[top_matched_key]\n", + "# sj_matching_unc = (\n", + "# (np.sum(events[\"lp_sf_unmatched_quarks\"]) / (len(events) * 3))\n", + "# # OR of double matched and boundary quarks\n", + "# # >0.1 to avoid floating point errors\n", + "# + (\n", + "# np.sum((events[\"lp_sf_double_matched_event\"] + events[\"lp_sf_boundary_quarks\"]) > 0.1)\n", + "# / (len(events))\n", + "# )\n", + "# ).values[0]\n", + "# sj_matching_unc" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tesing distortion uncertainty" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "\n", + "# package_path = Path(__file__).parent.parent.resolve()\n", + "package_path = Path(\"../\").resolve()\n", + "\n", + "for key in [\n", + " \"TTToSemiLeptonic\",\n", + " # \"ST_tW_antitop_5f_NoFullyHadronicDecays\",\n", + " # \"ST_tW_top_5f_NoFullyHadronicDecays\",\n", + " # \"ST_s-channel_4f_leptonDecays\",\n", + " # \"ST_t-channel_antitop_4f_InclusiveDecays\",\n", + " # \"ST_t-channel_top_4f_InclusiveDecays\",\n", + "]:\n", + " sig_lp_hist = utils.get_pickles(f\"{signal_data_dir}/{year}/{key}/pickles\", year, key)[\"lp_hist\"]\n", + "\n", + " # remove negatives\n", + " sig_lp_hist.values()[sig_lp_hist.values() < 0] = 0\n", + "\n", + " with (package_path / f\"corrections/lp_ratios/signals/{year}_{key}.hist\").open(\"wb\") as f:\n", + " pickle.dump(sig_lp_hist, f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sig_lp_hist.values()[sig_lp_hist.values() <= 0] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sig_mc_ratio = np.nan_to_num((sig_lp_hist.values() / sig_tot) / (mc_nom / mc_tot), nan=1.0)\n", + "sig_mc_ratio[sig_mc_ratio == 0] = 1.0\n", + "sig_mc_ratio = np.clip(sig_mc_ratio, 0.5, 2.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import uproot\n", + "\n", + "# initialize lund plane scale factors lookups\n", + "f = uproot.open(package_path / f\"corrections/lp_ratios/ratio_{year[:4]}.root\")\n", + "\n", + "# 3D histogram: [subjet_pt, ln(0.8/Delta), ln(kT/GeV)]\n", + "mc_nom = f[\"mc_nom\"].to_numpy()\n", + "ratio_edges = mc_nom[1:]\n", + "mc_nom = mc_nom[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(sig_lp_hist.values()[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(mc_nom[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.imshow(sig_mc_ratio_pt[5])\n", + "plt.colorbar()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mc_tot = np.sum(mc_nom)\n", + "sig_tot = sig_lp_hist.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sig_tot_pt = np.sum(sig_lp_hist.values(), axis=(1, 2), keepdims=True)\n", + "mc_tot_pt = np.sum(mc_nom, axis=(1, 2), keepdims=True)\n", + "sig_mc_ratio_pt = np.nan_to_num((sig_lp_hist.values() / sig_tot_pt) / (mc_nom / mc_tot_pt), nan=1.0)\n", + "sig_mc_ratio_pt[sig_mc_ratio_pt == 0] = 1.0\n", + "sig_mc_ratio_pt = np.clip(sig_mc_ratio_pt, 0.5, 2.0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sig_mc_ratio = np.clip(\n", + " np.nan_to_num((sig_lp_hist.values() / sig_tot) / (mc_nom / mc_tot), nan=1), 0.5, 2.0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot_samples = [\n", + " \"QCD\",\n", + " \"Diboson\",\n", + " # \"Single Top\",\n", + " \"W+Jets\",\n", + " \"TT Unmatched\",\n", + " \"TT W Matched\",\n", + " top_matched_key,\n", + "]\n", + "\n", + "bg_colours = {\n", + " \"QCD\": \"lightblue\",\n", + " \"Single Top\": \"darkblue\",\n", + " \"TT Unmatched\": \"darkgreen\",\n", + " \"TT W Matched\": \"green\",\n", + " \"TT Top Matched\": \"orange\",\n", + " \"W+Jets\": \"darkred\",\n", + " \"Diboson\": \"red\",\n", + "}" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pre plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# {var: (bins, label)}\n", + "plot_vars = {\n", + " # \"ak8FatJetMsd\": ([20, 125, 225], r\"$m_{SD}$ (GeV)\"),\n", + " # \"ak8FatJetParticleNetMass\": ([30, 50, 200], r\"$m_{reg}$ (GeV)\"),\n", + " # \"ak8FatJetPt\": ([30, 0, 1200], r\"$p_T$ (GeV)\"),\n", + " # \"MET_pt\": ([30, 0, 200], r\"MET (GeV)\"),\n", + " # \"ak8FatJetnPFCands\": ([20, 0, 120], r\"# of PF Candidates\"),\n", + " # \"ak8FatJetParticleNet_Th4q\": ([20, 0.6, 1], r\"ParticleNet $T_{H4q}$ Non-MD\"),\n", + " \"ak8FatJetParTMD_THWW4q\": ([20, 0.2, 1], r\"$T_{HVV}$\"),\n", + " # \"tau21\": ([20, 0.04, 0.8], r\"$\\tau_{21}$\"),\n", + " # \"tau32\": ([20, 0.2, 1], r\"$\\tau_{32}$\"),\n", + " # \"tau43\": ([20, 0.42, 1], r\"$\\tau_{43}$\"),\n", + " # \"tau42\": ([20, 0, 1], r\"$\\tau_{42}$\"),\n", + " # \"tau41\": ([20, 0, 1], r\"$\\tau_{41}$\"),\n", + "}\n", + "\n", + "pre_hists = {}\n", + "\n", + "for var, (bins, label) in plot_vars.items():\n", + " if var not in pre_hists:\n", + " pre_hists[var] = utils.singleVarHistNoMask(\n", + " events_dict, var, bins, label, weight_key=\"weight\"\n", + " )\n", + "\n", + "merger_pre_plots = PdfMerger()\n", + "\n", + "for var, var_hist in pre_hists.items():\n", + " name = f\"{plot_dir}/pre_{var}.pdf\"\n", + " plotting.ratioLinePlot(\n", + " var_hist,\n", + " plot_samples,\n", + " year,\n", + " # bg_err=None,\n", + " name=name,\n", + " bg_colours=bg_colours,\n", + " # bg_order=plot_samples,\n", + " # ratio_ylims=[0.6, 1.3],\n", + " )\n", + " merger_pre_plots.append(name)\n", + "\n", + "merger_pre_plots.write(f\"{plot_dir}/PrePlots.pdf\")\n", + "merger_pre_plots.close()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Post plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "post_hists = {}\n", + "post_hists_err = {}\n", + "uncs_dict = {}\n", + "\n", + "events = events_dict[top_matched_key]\n", + "\n", + "for var, (bins, label) in plot_vars.items():\n", + " # if var not in post_hists:\n", + " toy_hists = []\n", + " for i in range(events[\"lp_sf\"].shape[1]):\n", + " toy_hists.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[\"lp_sf\"][i].values,\n", + " )[0]\n", + " )\n", + "\n", + " sys_up_down = []\n", + " for key in [\"lp_sf_sys_up\", \"lp_sf_sys_down\"]:\n", + " sys_up_down.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[key][0].values,\n", + " )[0]\n", + " )\n", + "\n", + " np_up_down = []\n", + " for key in [\"lp_sf_np_up\", \"lp_sf_np_down\"]:\n", + " np_up_down.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[key][0].values,\n", + " )[0]\n", + " )\n", + "\n", + " um_up_down = []\n", + " for key in [\"lp_sf_unmatched_up\", \"lp_sf_unmatched_down\"]:\n", + " um_up_down.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[key].values,\n", + " )[0]\n", + " )\n", + "\n", + " nom_vals = toy_hists[0] # first column are nominal values\n", + "\n", + " pt_toy_hists = []\n", + " for i in range(events[\"lp_sf_pt_extrap_vars\"].shape[1]):\n", + " pt_toy_hists.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[\"lp_sf_pt_extrap_vars\"][i].values,\n", + " )[0]\n", + " )\n", + "\n", + " b_ratio_hist = np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values\n", + " * events[\"lp_sfs_bl_ratio\"][0].values\n", + " * events[\"lp_sf_lnN\"][0].values,\n", + " )[0]\n", + "\n", + " uncs = {\n", + " \"stat_unc\": np.minimum(nom_vals, np.std(toy_hists[1:], axis=0)), # cap at 100% unc\n", + " \"syst_rat_unc\": np.minimum(nom_vals, (np.abs(sys_up_down[0] - sys_up_down[1])) / 2),\n", + " \"np_unc\": np.minimum(nom_vals, (np.abs(np_up_down[0] - np_up_down[1])) / 2),\n", + " \"um_unc\": np.minimum(nom_vals, (np.abs(um_up_down[0] - um_up_down[1])) / 2),\n", + " # \"syst_sjm_unc\": nom_vals * sj_matching_unc,\n", + " \"syst_sjpt_unc\": np.minimum(nom_vals, np.std(pt_toy_hists, axis=0)),\n", + " \"syst_b_unc\": np.abs(1 - (b_ratio_hist / nom_vals)) * nom_vals,\n", + " }\n", + "\n", + " # uncs = {}\n", + "\n", + " # for i, shift in enumerate([\"up\", \"down\"]):\n", + " # uncs[shift] = {\n", + " # \"syst_rat_unc\": np.clip(sys_up_down[i], 0, 2 * nom_vals),\n", + " # \"np_unc\": np.clip(np_up_down[i], 0, 2 * nom_vals),\n", + " # \"um_unc\": np.clip(um_up_down[i], 0, 2 * nom_vals),\n", + " # }\n", + "\n", + " # uncs[shift]\n", + "\n", + " # for key, val in uncs_symm.items():\n", + " # if shift == \"up\":\n", + " # uncs[shift][key] = nom_vals + val\n", + " # else:\n", + " # uncs[shift][key] = nom_vals - val\n", + "\n", + " uncs_dict[var] = uncs\n", + "\n", + " unc = np.linalg.norm(list(uncs.values()), axis=0)\n", + "\n", + " thist = deepcopy(pre_hists[var])\n", + " top_matched_key_index = np.where(np.array(list(thist.axes[0])) == top_matched_key)[0][0]\n", + " thist.view(flow=False)[top_matched_key_index, :].value = nom_vals\n", + " post_hists[var] = thist\n", + " post_hists_err[var] = unc\n", + "\n", + "\n", + "merger_post_plots = PdfMerger()\n", + "\n", + "for var, var_hist in post_hists.items():\n", + " name = f\"{plot_dir}/post_{var}.pdf\"\n", + " plotting.ratioLinePlot(\n", + " var_hist,\n", + " plot_samples,\n", + " year,\n", + " bg_colours=bg_colours,\n", + " bg_err=post_hists_err[var],\n", + " name=name,\n", + " )\n", + " merger_post_plots.append(name)\n", + "\n", + "merger_post_plots.write(f\"{plot_dir}/PostPlots.pdf\")\n", + "merger_post_plots.close()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Post LnN Plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "post_lnN_hists = {}\n", + "post_lnN_hists_err = {}\n", + "uncs_lnN_dict = {}\n", + "\n", + "events = events_dict[top_matched_key]\n", + "\n", + "for var, (bins, label) in plot_vars.items():\n", + " if var not in post_lnN_hists:\n", + " toy_hists = []\n", + " for i in range(events[\"lp_sf_lnN\"].shape[1]):\n", + " toy_hists.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[\"lp_sf_lnN\"][i].values,\n", + " )[0]\n", + " )\n", + "\n", + " sys_up_down = []\n", + " for key in [\"lp_sf_sys_up\", \"lp_sf_sys_down\"]:\n", + " sys_up_down.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[key][0].values,\n", + " )[0]\n", + " )\n", + "\n", + " np_up_down = []\n", + " for key in [\"lp_sf_np_up\", \"lp_sf_np_down\"]:\n", + " np_up_down.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * np.nan_to_num(events[key][0].values),\n", + " )[0]\n", + " )\n", + "\n", + " dist_up_down = []\n", + " for key in [\"lp_sf_dist_up\", \"lp_sf_dist_down\"]:\n", + " dist_up_down.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * np.nan_to_num(events[key][0].values),\n", + " )[0]\n", + " )\n", + "\n", + " um_up_down = []\n", + " for key in [\"lp_sf_unmatched_up\", \"lp_sf_unmatched_down\"]:\n", + " um_up_down.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * np.nan_to_num(events[key].values),\n", + " )[0]\n", + " )\n", + "\n", + " nom_vals = toy_hists[0] # first column are nominal values\n", + "\n", + " pt_toy_hists = []\n", + " for i in range(events[\"lp_sf_pt_extrap_vars\"].shape[1]):\n", + " pt_toy_hists.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[\"lp_sf_pt_extrap_vars\"][i].values,\n", + " )[0]\n", + " )\n", + "\n", + " b_ratio_hist = np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values\n", + " * events[\"lp_sfs_bl_ratio\"][0].values\n", + " * events[\"lp_sf_lnN\"][0].values,\n", + " )[0]\n", + "\n", + " uncs = {\n", + " \"stat_unc\": np.minimum(nom_vals, np.std(toy_hists[1:], axis=0)), # cap at 100% unc\n", + " \"syst_rat_unc\": np.minimum(nom_vals, (np.abs(sys_up_down[0] - sys_up_down[1])) / 2),\n", + " \"np_unc\": np.minimum(nom_vals, (np.abs(np_up_down[0] - np_up_down[1])) / 2),\n", + " \"dist_unc\": np.minimum(nom_vals, (np.abs(dist_up_down[0] - dist_up_down[1])) / 2),\n", + " \"um_unc\": np.minimum(nom_vals, (np.abs(um_up_down[0] - um_up_down[1])) / 2),\n", + " # \"syst_sjm_unc\": nom_vals * sj_matching_unc,\n", + " \"syst_sjpt_unc\": np.minimum(nom_vals, np.std(pt_toy_hists, axis=0)),\n", + " \"syst_b_unc\": np.abs(1 - (b_ratio_hist / nom_vals)) * nom_vals,\n", + " }\n", + "\n", + " uncs_lnN_dict[var] = uncs\n", + "\n", + " unc = np.linalg.norm(list(uncs.values()), axis=0)\n", + "\n", + " thist = deepcopy(pre_hists[var])\n", + " top_matched_key_index = np.where(np.array(list(thist.axes[0])) == top_matched_key)[0][0]\n", + " thist.view(flow=False)[top_matched_key_index, :].value = nom_vals\n", + " post_lnN_hists[var] = thist\n", + "\n", + " post_lnN_hists_err[var] = unc\n", + "\n", + "\n", + "merger_post_plots = PdfMerger()\n", + "\n", + "for var, var_hist in post_lnN_hists.items():\n", + " name = f\"{plot_dir}/postlnN_{var}.pdf\"\n", + " plotting.ratioLinePlot(\n", + " var_hist,\n", + " plot_samples,\n", + " year,\n", + " bg_colours=bg_colours,\n", + " bg_err=post_lnN_hists_err[var],\n", + " name=name,\n", + " )\n", + " merger_post_plots.append(name)\n", + "\n", + "merger_post_plots.write(f\"{plot_dir}/PostLnNPlots.pdf\")\n", + "merger_post_plots.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## SF Analysis" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "binn = -1\n", + "tvar = \"ak8FatJetParTMD_THWW4q\"\n", + "pre_vals = pre_hists[tvar].view(flow=False)[top_matched_key_index, :].value\n", + "nom_vals = post_hists[tvar].view(flow=False)[top_matched_key_index, :].value\n", + "unc = post_hists_err[tvar]\n", + "print(\"SF: \", nom_vals[binn] / pre_vals[binn])\n", + "print(\"Uncs: \", {key: val[binn] / nom_vals[binn] * 100 for key, val in uncs_dict[tvar].items()})\n", + "print(\"Combined: \", unc[binn] / nom_vals[binn] * 100)\n", + "print(\"Abs: \", unc[binn] / pre_vals[binn])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "binn = -1\n", + "tvar = \"ak8FatJetParTMD_THWW4q\"\n", + "pre_vals = pre_hists[tvar].view(flow=False)[top_matched_key_index, :].value\n", + "nom_vals = post_lnN_hists[tvar].view(flow=False)[top_matched_key_index, :].value\n", + "unc = post_lnN_hists_err[tvar]\n", + "print(\"SF: \", nom_vals[binn] / pre_vals[binn])\n", + "print(\"Uncs: \", {key: val[binn] / nom_vals[binn] * 100 for key, val in uncs_lnN_dict[tvar].items()})\n", + "print(\"Combined: \", unc[binn] / nom_vals[binn] * 100)\n", + "print(\"Abs: \", unc[binn] / pre_vals[binn])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Chi^2 improvement" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def chisquare(mc, data):\n", + " return np.sum(np.square(data - mc) / data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data_vals = pre_hists[tvar][\"Data\", ...].values()\n", + "pre_MC_vals = (\n", + " pre_hists[tvar][sum, :].values()\n", + " - data_vals\n", + " - pre_hists[tvar][\"TTbar\", :].values() # remove repeated data\n", + " - pre_hists[tvar][\"Top\", :].values()\n", + " - pre_hists[tvar][\"SingleTop\", :].values()\n", + ")\n", + "post_lnN_MC_vals = (\n", + " post_lnN_hists[tvar][sum, :].values()\n", + " - data_vals\n", + " - post_lnN_hists[tvar][\"TTbar\", :].values() # remove repeated data\n", + " - post_lnN_hists[tvar][\"Top\", :].values()\n", + " - post_lnN_hists[tvar][\"SingleTop\", :].values()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lb = 10\n", + "print(\"Pre chi2:\", chisquare(pre_MC_vals[-lb:], data_vals[-lb:]))\n", + "print(\"Post chi2:\", chisquare(post_lnN_MC_vals[-lb:], data_vals[-lb:]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lb = 5\n", + "print(\"Pre chi2:\", chisquare(pre_MC_vals[-lb:], data_vals[-lb:]))\n", + "print(\"Post chi2:\", chisquare(post_lnN_MC_vals[-lb:], data_vals[-lb:]))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ratio plots" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "tvar = \"ak8FatJetParTMD_THWW4q\"\n", + "\n", + "# plt.figure(figsize=(12, 12))\n", + "# hists = pre_hists[tvar]\n", + "# bg_tot = np.sum(hists[plot_samples, :].values(), axis=0)\n", + "# mcdata_ratio = (bg_tot + 1e-5) / hists[data_key, :].values()\n", + "# _ = plt.hist(mcdata_ratio - 1, np.linspace(-0.5, 0.5, 10), histtype='step')\n", + "\n", + "plt.figure(figsize=(12, 12))\n", + "hists = post_hists[tvar]\n", + "bg_tot = np.sum(hists[plot_samples, :].values(), axis=0)\n", + "data_tot = hists[data_key, :].values()\n", + "unc = post_hists_err[tvar]\n", + "mcdata_ratio = (bg_tot) / data_tot\n", + "_ = plt.hist(((bg_tot - data_tot) / (unc))[10:], np.linspace(-6.5, 4.5, 23), histtype=\"step\")\n", + "plt.xlabel(\"(MC - Data) / Unc.\")\n", + "plt.savefig(f\"{plot_dir}/pull_hist.pdf\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plotting.ratioLinePlot(\n", + " post_hists[tvar],\n", + " plot_samples,\n", + " year,\n", + " bg_err=post_hists_err[tvar],\n", + " name=f\"{plot_dir}/post_ak8FatJetParTMD_THWW4q_pulls.pdf\",\n", + " pulls=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cut_dict = {}\n", + "\n", + "for key in events_dict:\n", + " cut_dict[key] = events_dict[key][events_dict[key][\"tau42\"][0] <= 0.3]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# {var: (bins, label)}\n", + "plot_vars = {\n", + " \"ak8FatJetParTMD_THWW4q\": ([20, 0.6, 1], r\"ParT $T_{HWW4q}$ MD\"),\n", + "}\n", + "\n", + "pre_hists_cut = {}\n", + "\n", + "for var, (bins, label) in plot_vars.items():\n", + " if var not in pre_hists_cut:\n", + " pre_hists_cut[var] = utils.singleVarHistNoMask(\n", + " cut_dict, var, bins, label, weight_key=\"weight\"\n", + " )\n", + "\n", + "merger_pre_plots = PdfFileMerger()\n", + "\n", + "for var, var_hist in pre_hists_cut.items():\n", + " name = f\"{plot_dir}/pre_{var}_tau42_cut.pdf\"\n", + " plotting.ratioLinePlot(\n", + " var_hist,\n", + " plot_samples,\n", + " year,\n", + " bg_err=None,\n", + " name=name,\n", + " )\n", + " merger_pre_plots.append(name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "post_lnN_cut_hists = {}\n", + "post_lnN_cut_hists_err = {}\n", + "uncs_lnN_cut_dict = {}\n", + "\n", + "events = cut_dict[top_matched_key]\n", + "\n", + "for var, (bins, label) in plot_vars.items():\n", + " if var not in post_lnN_cut_hists:\n", + " toy_hists = []\n", + " for i in range(events[\"lp_sf_lnN\"].shape[1]):\n", + " toy_hists.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[\"lp_sf_lnN\"][i].values,\n", + " )[0]\n", + " )\n", + "\n", + " sys_up_down = []\n", + " for key in [\"lp_sf_sys_up\", \"lp_sf_sys_down\"]:\n", + " sys_up_down.append(\n", + " np.histogram(\n", + " events[var][0].values.squeeze(),\n", + " np.linspace(*bins[1:], bins[0] + 1),\n", + " weights=events[\"weight\"][0].values * events[key][0].values,\n", + " )[0]\n", + " )\n", + "\n", + " nom_vals = toy_hists[0] # first column are nominal values\n", + "\n", + " uncs = {\n", + " \"stat_unc\": np.minimum(nom_vals, np.std(toy_hists[1:], axis=0)), # cap at 100% unc\n", + " \"syst_rat_unc\": np.minimum(nom_vals, (np.abs(sys_up_down[0] - sys_up_down[1])) / 2),\n", + " \"syst_sjm_unc\": nom_vals * sj_matching_unc,\n", + " \"syst_sjpt_unc\": nom_vals * sj_pt_unc,\n", + " }\n", + "\n", + " uncs_lnN_cut_dict[var] = uncs\n", + "\n", + " unc = np.linalg.norm(list(uncs.values()), axis=0)\n", + "\n", + " thist = deepcopy(pre_hists[var])\n", + " top_matched_key_index = np.where(np.array(list(thist.axes[0])) == top_matched_key)[0][0]\n", + " thist.view(flow=False)[top_matched_key_index, :].value = nom_vals\n", + " post_lnN_cut_hists[var] = thist\n", + "\n", + " post_lnN_cut_hists_err[var] = unc\n", + "\n", + "\n", + "merger_post_plots = PdfFileMerger()\n", + "\n", + "for var, var_hist in post_lnN_cut_hists.items():\n", + " name = f\"{plot_dir}/postlnN_{var}_cut.pdf\"\n", + " plotting.ratioLinePlot(\n", + " var_hist,\n", + " plot_samples,\n", + " year,\n", + " bg_err=post_lnN_cut_hists_err[var],\n", + " name=name,\n", + " )\n", + " merger_post_plots.append(name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mass_hist = utils.singleVarHistNoMask(\n", + " events_dict, \"ak8FatJetMass\", [20, 125, 225], r\"$m_{SD}$\", weight_key=\"weight\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plotting.ratioHistPlot(\n", + " mass_hist,\n", + " [\"QCD\", \"Diboson\", \"Single Top\", \"W+Jets\", \"TT Unmatched\", \"TT W Matched\", top_matched_key],\n", + " f\"{plot_dir}/\",\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.4 ('python310')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.15" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "31a7b1cb5f073f7a7d37b3db504c6954ce2b88e0f82e412b65ad0b5f2dd17394" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/HHbbVV/postprocessing/TopAnalysisLocal.ipynb b/src/HHbbVV/postprocessing/TopAnalysisLocal.ipynb index 6a552431..18c2c9a9 100644 --- a/src/HHbbVV/postprocessing/TopAnalysisLocal.ipynb +++ b/src/HHbbVV/postprocessing/TopAnalysisLocal.ipynb @@ -13,7 +13,7 @@ "import pandas as pd\n", "\n", "from pandas.errors import SettingWithCopyWarning\n", - "from HHbbVV.hh_vars import data_key\n", + "from hh_vars import data_key\n", "import postprocessing\n", "\n", "# ignore these because they don't seem to apply\n", @@ -50,7 +50,7 @@ "metadata": {}, "outputs": [], "source": [ - "plot_dir = \"../../../plots/ttsfs/24Mar1_update_lps_test\"\n", + "plot_dir = \"../../../plots/ttsfs/24Mar1_update_lps\"\n", "\n", "import os\n", "\n", @@ -169,17 +169,6 @@ "# del events_dict[\"TTbar\"]" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(np.sum(events_dict[top_matched_key][\"weight\"]))\n", - "print(np.sum(events_dict[\"TT W Matched\"][\"weight\"]))\n", - "print(np.sum(events_dict[\"TT Unmatched\"][\"weight\"]))" - ] - }, { "cell_type": "code", "execution_count": null, @@ -316,7 +305,7 @@ "source": [ "# {var: (bins, label)}\n", "plot_vars = {\n", - " # \"ak8FatJetMsd\": ([20, 125, 225], r\"$m_{SD}$ (GeV)\"),\n", + " \"ak8FatJetMsd\": ([20, 125, 225], r\"$m_{SD}$ (GeV)\"),\n", " # \"ak8FatJetParticleNetMass\": ([30, 50, 200], r\"$m_{reg}$ (GeV)\"),\n", " # \"ak8FatJetPt\": ([30, 0, 1200], r\"$p_T$ (GeV)\"),\n", " # \"MET_pt\": ([30, 0, 200], r\"MET (GeV)\"),\n", diff --git a/src/HHbbVV/postprocessing/plotting.py b/src/HHbbVV/postprocessing/plotting.py index 20f57791..887b12bf 100644 --- a/src/HHbbVV/postprocessing/plotting.py +++ b/src/HHbbVV/postprocessing/plotting.py @@ -213,7 +213,7 @@ def _asimov_significance(s, b): return np.sqrt(2 * ((s + b) * np.log(1 + (s / b)) - s)) -def add_cms_label(ax, year, label=None): +def add_cms_label(ax, year, label="Preliminary"): if year == "all": hep.cms.label( label, @@ -621,6 +621,7 @@ def ratioLinePlot( alpha=0.2, hatch="//", linewidth=0, + label="Lund Plane Uncertainty", ) # if sig_key in hists: @@ -635,20 +636,31 @@ def ratioLinePlot( hep.histplot( hists[data_key, :], ax=ax, yerr=data_err, histtype="errorbar", label=data_key, color="black" ) - ax.legend(ncol=2) + + if bg_err is not None: + # Switch order so that uncertainty label comes at the end + handles, labels = ax.get_legend_handles_labels() + handles = handles[1:] + handles[:1] + labels = labels[1:] + labels[:1] + ax.legend(handles, labels, ncol=2) + else: + ax.legend(ncol=2) + ax.set_ylim(0, ax.get_ylim()[1] * 1.5) data_vals = hists[data_key, :].values() if not pulls: - datamc_ratio = data_vals / (bg_tot + 1e-5) + # datamc_ratio = data_vals / (bg_tot + 1e-5) - if bg_err == "ratio": - yerr = ratio_uncertainty(data_vals, bg_tot, "poisson") - elif bg_err is None: - yerr = 0 - else: - yerr = datamc_ratio * (bg_err / (bg_tot + 1e-8)) + # if bg_err == "ratio": + # yerr = ratio_uncertainty(data_vals, bg_tot, "poisson") + # elif bg_err is None: + # yerr = 0 + # else: + # yerr = datamc_ratio * (bg_err / (bg_tot + 1e-8)) + + yerr = ratio_uncertainty(data_vals, bg_tot, "poisson") hep.histplot( hists[data_key, :] / (sum([hists[sample, :] for sample in bg_keys]).values() + 1e-5), @@ -658,6 +670,19 @@ def ratioLinePlot( color="black", capsize=4, ) + + if bg_err is not None: + # (bkg + err) / bkg + rax.fill_between( + np.repeat(hists.axes[1].edges, 2)[1:-1], + np.repeat((bg_tot - bg_err) / bg_tot, 2), + np.repeat((bg_tot + bg_err) / bg_tot, 2), + color="black", + alpha=0.1, + hatch="//", + linewidth=0, + ) + rax.set_ylabel("Data/MC") rax.set_ylim(0.5, 1.5) rax.grid() diff --git a/src/HHbbVV/processors/TTScaleFactorsSkimmer.py b/src/HHbbVV/processors/TTScaleFactorsSkimmer.py index 6900f3db..1f409f43 100644 --- a/src/HHbbVV/processors/TTScaleFactorsSkimmer.py +++ b/src/HHbbVV/processors/TTScaleFactorsSkimmer.py @@ -18,6 +18,8 @@ from coffea.nanoevents.methods import vector from coffea.processor import dict_accumulator +from HHbbVV import hh_vars + from .corrections import ( add_btag_weights, add_lepton_weights, @@ -421,7 +423,10 @@ def process(self, events: ak.Array): # Lund Plane SFs ######################### - if dataset in ["SingleTop", "TTToSemiLeptonic", "TTToSemiLeptonic_ext1"]: + lp_hist = None + hh_vars.lp_sf_vars.append(("lp_sfs_bl_ratio", 1)) + + if dataset in ["TTToSemiLeptonic", "TTToSemiLeptonic_ext1"] or dataset.startswith("ST_"): match_dict, gen_quarks, had_bs = ttbar_scale_factor_matching( events, leading_fatjets[:, 0], selection_args ) @@ -431,23 +436,27 @@ def process(self, events: ak.Array): skimmed_events = {**skimmed_events, **match_dict} if np.any(top_matched): - sf_dict = get_lund_SFs( + sf_dict_temp, lp_hist = get_lund_SFs( year, events[top_matched], fatjets[top_matched], fatjet_idx[top_matched].to_numpy(), num_prongs, gen_quarks[top_matched], + weights_dict["weight"][top_matched], trunc_gauss=True, lnN=True, gen_bs=had_bs[top_matched], # do b/l ratio uncertainty for tops as well + sample="TTToSemiLeptonic", ) - # fill zeros for all non-top-matched events - for key, val in list(sf_dict.items()): - # plus 1 for the nominal values - arr = np.zeros((len(events), val.shape[1])) - arr[top_matched] = val + sf_dict = {} + + # fill in 1s for non-top-matched jets + for key, shape in hh_vars.lp_sf_vars: + # breakpoint() + arr = np.ones((len(events), shape)) + arr[top_matched] = sf_dict_temp[key] sf_dict[key] = arr skimmed_events = {**skimmed_events, **sf_dict} @@ -456,6 +465,8 @@ def process(self, events: ak.Array): # Apply selections ############################## + # breakpoint() + skimmed_events = { key: value[selection.all(*selection.names)] for (key, value) in skimmed_events.items() } @@ -490,7 +501,13 @@ def process(self, events: ak.Array): ) self.dump_table(pddf, fname) - return {year: {dataset: {"totals": totals_dict, "cutflow": cutflow}}} + ret_dict = {year: {dataset: {"totals": totals_dict, "cutflow": cutflow}}} + + if lp_hist is not None: + ret_dict[year][dataset]["lp_hist"] = lp_hist + + print(ret_dict) + return ret_dict def postprocess(self, accumulator): return accumulator diff --git a/src/HHbbVV/processors/bbVVSkimmer.py b/src/HHbbVV/processors/bbVVSkimmer.py index f4916c83..72f4f349 100644 --- a/src/HHbbVV/processors/bbVVSkimmer.py +++ b/src/HHbbVV/processors/bbVVSkimmer.py @@ -120,6 +120,7 @@ class bbVVSkimmer(SkimmerABC): jecs = hh_vars.jecs # only the branches necessary for templates and post processing + # IMPORTANT: Add Lund plane branches in hh_vars.py min_branches = [ # noqa: RUF012 "ak8FatJetPhi", "ak8FatJetEta", @@ -673,15 +674,16 @@ def process(self, events: ak.Array): # Lund plane SFs ################ + lp_hist = None + if isSignal and self._systematics and self._lp_sfs: - # (var, # columns) logging.info("Starting LP SFs and saving: " + str(hh_vars.lp_sf_vars)) if len(skimmed_events["weight"]): genbb = genbb[sel_all] genq = genq[sel_all] - sf_dicts = [] + sf_dicts, lp_hists = [], [] lp_num_jets = num_jets if self._save_all else 1 for i in range(lp_num_jets): @@ -713,7 +715,7 @@ def process(self, events: ak.Array): for key, (selector, gen_quarks, num_prongs) in selectors.items(): if np.sum(selector) > 0: - selected_sfs[key] = get_lund_SFs( + selected_sfs[key], lp_hist = get_lund_SFs( year, events[sel_all][selector], fatjets[sel_all][selector], @@ -724,10 +726,13 @@ def process(self, events: ak.Array): ), # giving HVV jet index if only doing LP SFs for HVV jet num_prongs, gen_quarks[selector], + weights_dict["weight"][sel_all][selector], trunc_gauss=False, lnN=True, ) + lp_hists.append(lp_hist) + sf_dict = {} # collect all the scale factors, fill in 1s for unmatched jets @@ -743,6 +748,7 @@ def process(self, events: ak.Array): sf_dicts.append(sf_dict) sf_dicts = concatenate_dicts(sf_dicts) + lp_hist = sum(lp_hists) else: logging.info("No signal events selected") @@ -790,7 +796,13 @@ def process(self, events: ak.Array): fname = events.behavior["__events_factory__"]._partition_key.replace("/", "_") + ".parquet" self.dump_table(pddf, fname) - return {year: {dataset: {"totals": totals_dict, "cutflow": cutflow}}} + ret_dict = {year: {dataset: {"totals": totals_dict, "cutflow": cutflow}}} + + if lp_hist is not None: + ret_dict[year][dataset]["lp_hist"] = lp_hist + + print(ret_dict) + return ret_dict def postprocess(self, accumulator): return accumulator diff --git a/src/HHbbVV/processors/corrections.py b/src/HHbbVV/processors/corrections.py index 02d4d1c4..015e4235 100644 --- a/src/HHbbVV/processors/corrections.py +++ b/src/HHbbVV/processors/corrections.py @@ -16,6 +16,7 @@ import awkward as ak import correctionlib +import hist import numpy as np from coffea import util as cutil from coffea.analysis_tools import Weights @@ -390,7 +391,7 @@ def _get_lepton_clipped(lep_pt, lep_eta, lepton_type, corr=None): # Used only for validation region right now def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_type: str = "muon"): - ul_year = get_UL_year(year) + # ul_year = get_UL_year(year) cset = correctionlib.CorrectionSet.from_file(get_pog_json(lepton_type, year)) @@ -404,9 +405,9 @@ def add_lepton_weights(weights: Weights, year: str, lepton: MuonArray, lepton_ty lepton_pt, lepton_eta = _get_lepton_clipped(lep_pt, lep_eta, lepton_type, corr) values = {} - values["nominal"] = cset[json_map_name].evaluate(ul_year, lepton_eta, lepton_pt, "sf") - values["up"] = cset[json_map_name].evaluate(ul_year, lepton_eta, lepton_pt, "systup") - values["down"] = cset[json_map_name].evaluate(ul_year, lepton_eta, lepton_pt, "systdown") + values["nominal"] = cset[json_map_name].evaluate(lepton_eta, lepton_pt, "nominal") + values["up"] = cset[json_map_name].evaluate(lepton_eta, lepton_pt, "systup") + values["down"] = cset[json_map_name].evaluate(lepton_eta, lepton_pt, "systdown") # add weights (for now only the nominal weight) weights.add(f"{lepton_type}_{corr}", values["nominal"], values["up"], values["down"]) @@ -666,16 +667,22 @@ def add_trig_effs(weights: Weights, fatjets: FatJetArray, year: str, num_jets: i # caching these after loading once ( lp_year, + lp_sample, ratio_smeared_lookups, ratio_lnN_smeared_lookups, ratio_sys_up, ratio_sys_down, + ratio_dist_up, + ratio_dist_down, pt_extrap_lookups_dict, bratio, -) = (None, None, None, None, None, None, None) + ratio_edges, +) = (None, None, None, None, None, None, None, None, None, None, None) -def _get_lund_lookups(year: str, seed: int = 42, lnN: bool = True, trunc_gauss: bool = False): +def _get_lund_lookups( + year: str, seed: int = 42, lnN: bool = True, trunc_gauss: bool = False, sample: str = None +): import fastjet dR = 0.8 @@ -730,6 +737,34 @@ def _get_lund_lookups(year: str, seed: int = 42, lnN: bool = True, trunc_gauss: else: ratio_lnN_smeared_lookups = None + if sample is not None: + mc_nom = f["mc_nom"].to_numpy()[0] + + with (package_path / f"corrections/lp_ratios/signals/{year}_{sample}.hist").open( + "rb" + ) as histf: + sig_lp_hist = pickle.load(histf) + + # breakpoint() + # mc_tot = np.sum(mc_nom) + # sig_tot = sig_lp_hist.sum() + + sig_tot = np.sum(sig_lp_hist.values(), axis=(1, 2), keepdims=True) + mc_tot = np.sum(mc_nom, axis=(1, 2), keepdims=True) + + # 0s -> 1 in the ratio + sig_mc_ratio = np.nan_to_num((sig_lp_hist.values() / sig_tot) / (mc_nom / mc_tot), nan=1.0) + sig_mc_ratio[sig_mc_ratio == 0] = 1.0 + sig_mc_ratio = np.clip(sig_mc_ratio, 0.5, 2.0) + + ratio_dist_up = dense_lookup(ratio_nom * sig_mc_ratio, ratio_edges) + ratio_dist_down = dense_lookup(ratio_nom / sig_mc_ratio, ratio_edges) + + # breakpoint() + else: + ratio_dist_up = None + ratio_dist_down = None + # ------- pT extrapolation setup: creates lookups for all the parameters and errors ------ # def _np_pad(arr: np.ndarray, target: int = MAX_PT_FPARAMS): @@ -783,18 +818,21 @@ def _np_pad(arr: np.ndarray, target: int = MAX_PT_FPARAMS): ratio_lnN_smeared_lookups, ratio_sys_up, ratio_sys_down, + ratio_dist_up, + ratio_dist_down, pt_extrap_lookups_dict, bratio, + ratio_edges, ) def _get_flat_lp_vars(lds, kt_subjets_pt): if len(lds) != 1: # flatten and save offsets to unflatten afterwards - if type(lds.layout) == ak._ext.ListOffsetArray64: + if type(lds.layout) is ak._ext.ListOffsetArray64: ld_offsets = lds.kt.layout.offsets flat_subjet_pt = ak.flatten(kt_subjets_pt) - elif type(lds.layout) == ak._ext.ListArray64: + elif type(lds.layout) is ak._ext.ListArray64: ld_offsets = lds.layout.toListOffsetArray64(False).offsets flat_subjet_pt = kt_subjets_pt else: @@ -820,9 +858,6 @@ def _get_lund_arrays( Gets the ``num_prongs`` subjet pTs and Delta and kT per primary LP splitting of fatjets at ``fatjet_idx`` in each event. - Features are flattened (for now), and offsets are saved in ``ld_offsets`` to recover the event - structure. - Args: events (NanoEventsArray): nano events jec_fatjets (FatJetArray): post-JEC fatjets, used to update subjet pTs. @@ -830,7 +865,7 @@ def _get_lund_arrays( num_prongs (int): number of prongs / subjets per jet to reweight Returns: - flat_logD, flat_logkt, flat_subjet_pt, ld_offsets, kt_subjets_vec + lds, kt_subjets_vec, kt_subjets_pt: lund declusterings, subjet 4-vectors, JEC-corrected subjet pTs """ # jet definitions for LP SFs @@ -880,6 +915,27 @@ def _get_lund_arrays( return lds, kt_subjets_vec, kt_subjets_pt +def _get_flat_lund_arrays(events, jec_fatjet, fatjet_idx, num_prongs): + """Wrapper for the _get_lund_arrays and _get_flat_lp_vars functions + + returns: lds - lund declusterings, + kt_subjets_vec - subjet 4-vectors, + kt_subjets_pt - JEC-corrected subjet pTs, + ld_offsets - offsets for jagged structure, + flat_logD - flattened log(0.8/Delta), + flat_logkt - flattened log(kT/GeV), + flat_subjet_pt - flattened JEC-corrected subjet pTs + """ + lds, kt_subjets_vec, kt_subjets_pt = _get_lund_arrays( + events, jec_fatjet, fatjet_idx, num_prongs + ) + + lds_flat = ak.flatten(lds, axis=1) + ld_offsets, flat_logD, flat_logkt, flat_subjet_pt = _get_flat_lp_vars(lds_flat, kt_subjets_pt) + + return lds, kt_subjets_vec, kt_subjets_pt, ld_offsets, flat_logD, flat_logkt, flat_subjet_pt + + def _calc_lund_SFs( flat_logD: np.ndarray, flat_logkt: np.ndarray, @@ -969,6 +1025,8 @@ def get_lund_SFs( fatjet_idx: int | ak.Array, num_prongs: int, gen_quarks: GenParticleArray, + weights: np.ndarray, + sample: str = None, seed: int = 42, trunc_gauss: bool = False, lnN: bool = True, @@ -984,6 +1042,8 @@ def get_lund_SFs( jec_fatjets (FatJetArray): post-JEC fatjets, used to update subjet pTs. fatjet_idx (int | ak.Array): fatjet index num_prongs (int): number of prongs / subjets per jet to r + gen_quarks (GenParticleArray): gen quarks + weights (np.ndarray): event weights, for filling the LP histogram seed (int, optional): seed for random smearings. Defaults to 42. trunc_gauss (bool, optional): use truncated gaussians for smearing. Defaults to False. lnN (bool, optional): use log normals for smearings. Defaults to True. @@ -995,37 +1055,64 @@ def get_lund_SFs( """ # global variable to not have to load + smear LP ratios each time - global ratio_smeared_lookups, ratio_lnN_smeared_lookups, ratio_sys_up, ratio_sys_down, pt_extrap_lookups_dict, bratio, lp_year # noqa: PLW0603 + global ratio_smeared_lookups, ratio_lnN_smeared_lookups, ratio_sys_up, ratio_sys_down, ratio_dist_up, ratio_dist_down, pt_extrap_lookups_dict, bratio, ratio_edges, lp_year, lp_sample # noqa: PLW0603 if ( (lnN and ratio_lnN_smeared_lookups is None) or (trunc_gauss and ratio_smeared_lookups is None) or (lp_year != year) # redo if different year (can change to cache every year if needed) + or (lp_sample != sample) # redo if different sample (can change...) ): ( ratio_smeared_lookups, ratio_lnN_smeared_lookups, ratio_sys_up, ratio_sys_down, + ratio_dist_up, + ratio_dist_down, pt_extrap_lookups_dict, bratio, - ) = _get_lund_lookups(year, seed, lnN, trunc_gauss) + ratio_edges, + ) = _get_lund_lookups(year, seed, lnN, trunc_gauss, sample) lp_year = year + lp_sample = sample ratio_nominal = ratio_lnN_smeared_lookups[0] if lnN else ratio_smeared_lookups[0] jec_fatjet = jec_fatjets[np.arange(len(jec_fatjets)), fatjet_idx] - lds, kt_subjets_vec, kt_subjets_pt = _get_lund_arrays( - events, jec_fatjet, fatjet_idx, num_prongs + # get lund plane declusterings, subjets, and flattened LP vars + lds, kt_subjets_vec, kt_subjets_pt, ld_offsets, flat_logD, flat_logkt, flat_subjet_pt = ( + _get_flat_lund_arrays(events, jec_fatjet, fatjet_idx, num_prongs) ) - lds_flat = ak.flatten(lds, axis=1) - ld_offsets, flat_logD, flat_logkt, flat_subjet_pt = _get_flat_lp_vars(lds_flat, kt_subjets_pt) + ################################################################################################ + # ---- Fill LP histogram for signal for distortion uncertainty ---- # + ################################################################################################ - sfs = {} + lp_hist = hist.Hist( + hist.axis.Variable(ratio_edges[0], name="subjet_pt", label="Subjet pT [GeV]"), + hist.axis.Variable(ratio_edges[1], name="logD", label="ln(0.8/Delta)"), + hist.axis.Variable(ratio_edges[2], name="logkt", label="ln(kT/GeV)"), + ) + + # repeat weights for each LP splitting + flat_weights = np.repeat( + np.repeat(weights, num_prongs), ak.count(ak.flatten(lds.kt, axis=1), axis=1) + ) + + lp_hist.fill( + subjet_pt=flat_subjet_pt, + logD=flat_logD, + logkt=flat_logkt, + weight=flat_weights, + ) + ################################################################################################ # ---- get scale factors per jet + smearings for stat unc. + syst. variations + pt extrap unc. ---- # + ################################################################################################ + + sfs = {} if trunc_gauss: sfs["lp_sf"] = _calc_lund_SFs( @@ -1069,6 +1156,28 @@ def get_lund_SFs( [pt_extrap_lookups_dict["sys_up_params"]], ) + if ratio_dist_up is not None: + # breakpoint() + sfs["lp_sf_dist_down"] = _calc_lund_SFs( + flat_logD, + flat_logkt, + flat_subjet_pt, + ld_offsets, + num_prongs, + [ratio_dist_down], + [pt_extrap_lookups_dict["params"]], + ) + + sfs["lp_sf_dist_up"] = _calc_lund_SFs( + flat_logD, + flat_logkt, + flat_subjet_pt, + ld_offsets, + num_prongs, + [ratio_dist_up], + [pt_extrap_lookups_dict["params"]], + ) + sfs["lp_sf_pt_extrap_vars"] = _calc_lund_SFs( flat_logD, flat_logkt, @@ -1079,7 +1188,34 @@ def get_lund_SFs( pt_extrap_lookups_dict["smeared_params"], ) + ################################################################################################ + # ---- get scale factors after re-clustering with +/- one prong, for subjet matching uncs. ---- # + ################################################################################################ + + # need to save these for unclustered progns uncertainty + np_kt_subjets_vecs = [] + + for shift, nps in [("down", num_prongs - 1), ("up", num_prongs + 1)]: + # get lund plane declusterings, subjets, and flattened LP vars + _, np_kt_subjets_vec, _, np_ld_offsets, np_flat_logD, np_flat_logkt, np_flat_subjet_pt = ( + _get_flat_lund_arrays(events, jec_fatjet, fatjet_idx, nps) + ) + + sfs[f"lp_sf_np_{shift}"] = _calc_lund_SFs( + np_flat_logD, + np_flat_logkt, + np_flat_subjet_pt, + np_ld_offsets, + nps, + [ratio_lnN_smeared_lookups[0]], + [pt_extrap_lookups_dict["params"]], + ) + + np_kt_subjets_vecs.append(np_kt_subjets_vec) + + ################################################################################################ # ---- b-quark related uncertainties ---- # + ################################################################################################ if gen_bs is not None: assert ak.all( @@ -1125,7 +1261,9 @@ def get_lund_SFs( # weird edge case where b-subjet has no splittings sfs["lp_sfs_bl_ratio"] = 1.0 + ################################################################################################ # ---- subjet matching uncertainties ---- # + ################################################################################################ matching_dR = 0.2 sj_matched = [] @@ -1147,9 +1285,12 @@ def get_lund_SFs( sj_matched_idx_mask[~sj_matched] = -1 j_q_dr = gen_quarks.delta_r(jec_fatjet) - q_boundary = (j_q_dr > 0.7) * (j_q_dr < 0.9) - # events with quarks at the boundary of the jet - sfs["lp_sf_boundary_quarks"] = np.array(np.any(q_boundary, axis=1, keepdims=True)) + # events with quarks at the inside boundary of the jet + q_boundary = (j_q_dr > 0.7) * (j_q_dr <= 0.8) + sfs["lp_sf_inside_boundary_quarks"] = np.array(np.any(q_boundary, axis=1, keepdims=True)) + # events with quarks at the outside boundary of the jet + q_boundary = (j_q_dr > 0.8) * (j_q_dr <= 0.9) + sfs["lp_sf_outside_boundary_quarks"] = np.array(np.any(q_boundary, axis=1, keepdims=True)) # events which have more than one quark matched to the same subjet sfs["lp_sf_double_matched_event"] = np.any( @@ -1162,4 +1303,23 @@ def get_lund_SFs( # OLD pT extrapolation uncertainty sfs["lp_sf_num_sjpt_gt350"] = np.sum(kt_subjets_vec.pt > 350, axis=1, keepdims=True).to_numpy() - return sfs + # ------------- check unmatched quarks after +/- one prong reclustering --------------# + unmatched_quarks = [~sj_matched] + + for np_kt_subjets_vec in np_kt_subjets_vecs: + sj_matched = [] + + # get dR between gen quarks and subjets + for i in range(num_prongs): + sj_q_dr = np_kt_subjets_vec.delta_r(gen_quarks[:, i]) + # is quark matched to a subjet (dR < 0.2) + sj_matched.append(ak.min(sj_q_dr, axis=1) <= matching_dR) + + sj_matched = np.array(sj_matched).T + unmatched_quarks.append(~sj_matched) + + # quarks which are not matched in any of the reclusterings + unmatched_quarks = np.prod(unmatched_quarks, axis=0) + sfs["lp_sf_rc_unmatched_quarks"] = np.sum(unmatched_quarks, axis=1, keepdims=True) + + return sfs, lp_hist diff --git a/src/HHbbVV/scale_factors/top_reweighting.ipynb b/src/HHbbVV/scale_factors/top_reweighting.ipynb index fcff82dd..8de28262 100644 --- a/src/HHbbVV/scale_factors/top_reweighting.ipynb +++ b/src/HHbbVV/scale_factors/top_reweighting.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -36,14 +36,12 @@ "import os\n", "\n", "from HHbbVV.processors import corrections\n", - "import correctionlib\n", - "\n", - "import utils" + "import correctionlib" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -53,48 +51,19 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "plot_dir = \"../../../plots/ScaleFactors/24Feb26\"\n", + "plot_dir = \"../../../plots/ScaleFactors/24Jul22\"\n", "_ = os.system(f\"mkdir -p {plot_dir}\")" ] }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch FatJetAK15SubJet_nBHadrons in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch FatJetAK15SubJet_nCHadrons in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch FatJetAK15_nBHadrons in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch FatJetAK15_nCHadrons in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch FatJet_btagDDBvLV2 in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch FatJet_btagDDCvBV2 in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch FatJet_btagDDCvLV2 in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch FatJet_nBHadrons in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch FatJet_nCHadrons in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch SubJet_nBHadrons in , taking first instance\n", - " warnings.warn(\n", - "/uscms/home/rkansal/.local/lib/python3.9/site-packages/coffea/nanoevents/mapping/uproot.py:86: UserWarning: Found duplicate branch SubJet_nCHadrons in , taking first instance\n", - " warnings.warn(\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "events = nanoevents.NanoEventsFactory.from_root(\n", " # \"/eos/uscms/store/user/lpcpfnano/cmantill/v2_3/2017/HH_gen/GluGluToHHTobbVV_node_cHHH1_TuneCP5_13TeV-powheg-pythia8/GluGluToHHTobbVV_node_cHHH1/221017_221918/0000/nano_mc2017_100.root\",\n", @@ -118,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -163,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -181,7 +150,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -227,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -259,7 +228,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -283,7 +252,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -337,7 +306,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -348,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -365,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -529,7 +498,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -554,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -569,7 +538,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -592,7 +561,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -635,7 +604,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -680,7 +649,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -689,27 +658,7 @@ }, { "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "leading_fatjets" - ] - }, - { - "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -728,7 +677,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -743,7 +692,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -764,103 +713,57 @@ }, { "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "45000" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(events)" - ] - }, - { - "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "60" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "np.sum(top_match_dict[\"top_matched\"])" + "print(len(events))\n", + "print(np.sum(top_match_dict[\"top_matched\"]))\n", + "print(np.sum(top_match_dict[\"w_matched\"]))\n", + "print(np.sum(top_match_dict[\"unmatched\"]))" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "180" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "np.sum(top_match_dict[\"w_matched\"])" + "merged_top_events = presel_events[merged_top_jet_match]\n", + "had_top_jets = merged_top_events.FatJet[:, 0]\n", + "\n", + "merged_ak8_pfcands = merged_top_events.FatJetPFCands\n", + "merged_ak8_pfcands = merged_ak8_pfcands[\n", + " merged_ak8_pfcands.jetIdx == fatjet_idx[merged_top_jet_match]\n", + "]\n", + "merged_pfcands = merged_top_events.PFCands[merged_ak8_pfcands.pFCandsIdx]" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "855" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "np.sum(top_match_dict[\"unmatched\"])" + "merged_pfcands_vector_ptetaphi = ak.Array(\n", + " [\n", + " [{kin_key: cand[kin_key] for kin_key in skim_vars} for cand in event_cands]\n", + " for event_cands in merged_pfcands\n", + " ],\n", + " with_name=\"PtEtaPhiMLorentzVector\",\n", + ")" ] }, { - "cell_type": "code", - "execution_count": 23, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "merged_top_events = presel_events[merged_top_jet_match]\n", - "had_top_jets = merged_top_events.FatJet[:, 0]\n", - "\n", - "merged_ak8_pfcands = merged_top_events.FatJetPFCands\n", - "merged_ak8_pfcands = merged_ak8_pfcands[\n", - " merged_ak8_pfcands.jetIdx == fatjet_idx[merged_top_jet_match]\n", - "]\n", - "merged_pfcands = merged_top_events.PFCands[merged_ak8_pfcands.pFCandsIdx]" + "JEC Correction" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -887,22 +790,7 @@ }, { "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "merged_pfcands_vector_ptetaphi = ak.Array(\n", - " [\n", - " [{kin_key: cand[kin_key] for kin_key in skim_vars} for cand in event_cands]\n", - " for event_cands in merged_pfcands\n", - " ],\n", - " with_name=\"PtEtaPhiMLorentzVector\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -916,36 +804,9 @@ }, { "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "#--------------------------------------------------------------------------\n", - "# FastJet release 3.4.0\n", - "# M. Cacciari, G.P. Salam and G. Soyez \n", - "# A software package for jet finding and analysis at colliders \n", - "# http://fastjet.fr \n", - "#\t \n", - "# Please cite EPJC72(2012)1896 [arXiv:1111.6097] if you use this package\n", - "# for scientific work and optionally PLB641(2006)57 [hep-ph/0512210]. \n", - "# \n", - "# FastJet is provided without warranty under the GNU GPL v2 or higher. \n", - "# It uses T. Chan's closest pair algorithm, S. Fortune's Voronoi code,\n", - "# CGAL and 3rd party plugin jet algorithms. See COPYING file for details.\n", - "#--------------------------------------------------------------------------\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "UserWarning: dcut and exclusive jets for jet-finders other than kt, C/A or genkt with p>=0 should be interpreted with care.\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# cluster first with kT\n", "kt_clustering = fastjet.ClusterSequence(merged_pfcands_vector_ptetaphi, ktdef)\n", @@ -964,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -983,7 +844,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -998,20 +859,9 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "closest_sjidx = np.argmin(subjet_bs_dr, axis=1).to_numpy()\n", "kt_subjets_pt[np.arange(len(kt_subjets_pt)), closest_sjidx]" @@ -1019,7 +869,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1035,40 +885,18 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "kt_subjets_pt" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "kt_subjets_pt[np.arange(len(kt_subjets_pt)), closest_sjidx]" ] @@ -1165,44 +993,6 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "f = uproot.open(\"../corrections/lp_ratios/ratio_2017.root\")\n", - "f.keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "f[\"ratio_sys_tot_down\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "f[\"pt_extrap\"][\"func_16_12\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fold = uproot.open(\"../corrections/lp_ratios/old/ratio_june9.root\")\n", - "fold.keys()" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], "source": [ "f = uproot.open(\"../corrections/lp_ratios/ratio_2017.root\")\n", "# f = uproot.open(\"../corrections/ratio_june9.root\")\n", @@ -1218,211 +1008,9 @@ "ratio_nom_errs[zero_vals] = 0\n", "\n", "ratio_shape = f[\"ratio_nom\"].values().shape\n", - "bl_ratio = f[\"h_bl_ratio\"].to_numpy()[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[[0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " ...,\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ]],\n", - "\n", - " [[0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " ...,\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ]],\n", - "\n", - " [[0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " ...,\n", - " [2. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ]],\n", - "\n", - " [[0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " ...,\n", - " [0. , 1.840388 , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ]],\n", - "\n", - " [[0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " ...,\n", - " [1.4767264 , 1.3192567 , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [1.0331354 , 1.4990366 , 1.9544313 , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ]],\n", - "\n", - " [[0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " ...,\n", - " [0. , 0.53056574, 2. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0.42190078, 0. , ..., 0. ,\n", - " 0. , 0. ],\n", - " [0. , 0. , 0. , ..., 0. ,\n", - " 0. , 0. ]]], dtype=float32)" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bl_ratio" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[[1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " ...,\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ]],\n", - "\n", - " [[1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " ...,\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ]],\n", - "\n", - " [[1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " ...,\n", - " [0.81918097, 1.6875561 , 0.16781662, ..., 1. ,\n", - " 1. , 1. ],\n", - " [1.6282549 , 3.4878435 , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ]],\n", - "\n", - " [[1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " ...,\n", - " [0.20374994, 1. , 1.4421521 , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 0.6945209 , 2.0556872 , ..., 1. ,\n", - " 1. , 1. ],\n", - " [6. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ]],\n", - "\n", - " [[1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " ...,\n", - " [0.2418148 , 1.459049 , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1.5244532 , 0.805354 , 1.6780305 , ..., 1. ,\n", - " 1. , 1. ],\n", - " [4. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ]],\n", - "\n", - " [[1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " ...,\n", - " [1. , 1. , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [0.11765861, 2.4962053 , 1. , ..., 1. ,\n", - " 1. , 1. ],\n", - " [1. , 2. , 1. , ..., 1. ,\n", - " 1. , 1. ]]], dtype=float32)" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ratio_nom" + "bl_ratio = f[\"h_bl_ratio\"].to_numpy()[0]\n", + "\n", + "n_LP_sf_toys = 100" ] }, { @@ -1438,16 +1026,6 @@ "_ = plt.colorbar()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# max_pt_bin = ratio_nom_edges[0][-2]\n", - "n_LP_sf_toys = 100" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1455,27 +1033,16 @@ "outputs": [], "source": [ "plt.figure(figsize=(10, 6))\n", - "plt.imshow(ratio_nom[5].T[::-1], extent=[-1, 8, -5, 6.9], cmap=\"turbo\")\n", + "plt.imshow(\n", + " ratio_nom[5].T[::-1],\n", + " extent=[-ratio_edges[1][0], ratio_edges[1][-1], ratio_edges[2][0], ratio_edges[2][-1]],\n", + " cmap=\"turbo\",\n", + ")\n", "plt.xlabel(r\"ln$(0.8/\\Delta)$\")\n", "plt.ylabel(r\"ln$(k_T/GeV)$\")\n", "_ = plt.colorbar()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# maxp = 0\n", - "# for i in range(ratio_shape[1]):\n", - "# for j in range(ratio_shape[2]):\n", - "# if len(pt_extrap_params[i][j]) > maxp:\n", - "# maxp = len(pt_extrap_params[i][j])\n", - "\n", - "# maxp" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1545,9 +1112,7 @@ "\n", "# produces array of shape ``[n_sf_toys, subjet_pt bins, ln(0.8/Delta) bins, ln(kT/GeV) bins]``\n", "ratio_nom_smeared = ratio_nom + (ratio_nom_errs * rand_noise)\n", - "ratio_smeared_lookups = [\n", - " dense_lookup(ratio_nom_smeared[i], ratio_nom_edges) for i in range(n_sf_toys)\n", - "]" + "ratio_smeared_lookups = [dense_lookup(ratio_nom_smeared[i], ratio_edges) for i in range(n_sf_toys)]" ] }, { @@ -1556,12 +1121,148 @@ "metadata": {}, "outputs": [], "source": [ - "# save offsets to recover awkward structure later\n", - "ld_offsets = lds.kt.layout.offsets\n", - "flat_logD = np.log(0.8 / ak.flatten(lds).Delta).to_numpy()\n", - "flat_logkt = np.log(ak.flatten(lds).kt).to_numpy()\n", - "# repeat subjet pt for each lund declustering\n", - "flat_subjet_pt = np.repeat(ak.flatten(kt_subjets_pt), ak.count(lds.kt, axis=1)).to_numpy()" + "def _get_flat_lp_vars(lds, kt_subjets_pt):\n", + " if len(lds) != 1:\n", + " # flatten and save offsets to unflatten afterwards\n", + " if type(lds.layout) == ak._ext.ListOffsetArray64:\n", + " ld_offsets = lds.kt.layout.offsets\n", + " flat_subjet_pt = ak.flatten(kt_subjets_pt)\n", + " elif type(lds.layout) == ak._ext.ListArray64:\n", + " ld_offsets = lds.layout.toListOffsetArray64(False).offsets\n", + " flat_subjet_pt = kt_subjets_pt\n", + " else:\n", + " # edge case of single subjet...\n", + " ld_offsets = [0]\n", + " flat_subjet_pt = kt_subjets_pt\n", + "\n", + " # repeat subjet pt for each lund declustering\n", + " flat_subjet_pt = np.repeat(flat_subjet_pt, ak.count(lds.kt, axis=1)).to_numpy()\n", + " flat_logD = np.log(0.8 / ak.flatten(lds).Delta).to_numpy()\n", + " flat_logkt = np.log(ak.flatten(lds).kt).to_numpy()\n", + "\n", + " return ld_offsets, flat_logD, flat_logkt, flat_subjet_pt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lds_flat = ak.flatten(lds, axis=1)\n", + "ld_offsets, flat_logD, flat_logkt, flat_subjet_pt = _get_flat_lp_vars(lds_flat, kt_subjets_pt)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fill LP histogram for signal" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import hist\n", + "\n", + "lp_hist = hist.Hist(\n", + " hist.axis.Variable(ratio_edges[0], name=\"subjet_pt\", label=\"Subjet pT [GeV]\"),\n", + " hist.axis.Variable(ratio_edges[1], name=\"logD\", label=\"ln(0.8/Delta)\"),\n", + " hist.axis.Variable(ratio_edges[2], name=\"logkt\", label=\"ln(kT/GeV)\"),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "weight = merged_top_events.genWeight.to_numpy()\n", + "flat_weight = np.repeat(np.repeat(weight, num_prongs), ak.count(lds_flat.kt, axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "type(lds.layout) is ak._ext.ListArray64" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lp_hist" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sum([lp_hist, []])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lp_hist.fill(\n", + " subjet_pt=flat_subjet_pt,\n", + " logD=flat_logD,\n", + " logkt=flat_logkt,\n", + " weight=flat_weight,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lp_hist[0, ...]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Testing summing over histograms\n", + "from copy import deepcopy\n", + "\n", + "lp_hist2 = deepcopy(lp_hist)\n", + "lp_hist2.values()[:] = 1\n", + "lp_hist2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sum([lp_hist, lp_hist2])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "pT Extrapolation" ] }, { @@ -1570,6 +1271,7 @@ "metadata": {}, "outputs": [], "source": [ + "max_pt_bin = ratio_edges[0][-2]\n", "high_pt_sel = flat_subjet_pt > max_pt_bin\n", "hpt_logD = flat_logD[high_pt_sel]\n", "hpt_logkt = flat_logkt[high_pt_sel]\n", @@ -1585,7 +1287,7 @@ "metadata": {}, "outputs": [], "source": [ - "clip_max, clip_min = 10, 0.1\n", + "clip_max, clip_min = 5.0, 1 / 5.0\n", "pt_lookup = pt_extrap_lookups_dict[\"params\"]\n", "params = pt_lookup(hpt_logD, hpt_logkt)\n", "pt_extrap_vals = np.maximum(np.minimum(np.sum(params * sj_pt_orders, axis=1), clip_max), clip_min)" @@ -1803,7 +1505,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.9.15" }, "orig_nbformat": 4, "vscode": { diff --git a/src/condor/check_jobs.py b/src/condor/check_jobs.py index 74f6a0e4..bd43cff0 100644 --- a/src/condor/check_jobs.py +++ b/src/condor/check_jobs.py @@ -20,6 +20,7 @@ run_utils.parse_common_args(parser) parser.add_argument("--user", default="rkansal", help="user", type=str) +parser.add_argument("--site", default="lpc", help="t2 site", choices=["lpc", "ucsd"], type=str) run_utils.add_bool_arg(parser, "submit-missing", default=False, help="submit missing files") run_utils.add_bool_arg( parser, @@ -32,8 +33,12 @@ trigger_processor = args.processor.startswith("trigger") -eosdir = f"/eos/uscms/store/user/{args.user}/bbVV/{args.processor}/{args.tag}/{args.year}/" -user_condor_dir = f"/uscms/home/{args.user}/nobackup/HHbbVV/condor/" +if args.site == "lpc": + eosdir = f"/eos/uscms/store/user/{args.user}/bbVV/{args.processor}/{args.tag}/{args.year}/" + user_condor_dir = f"/uscms/home/{args.user}/nobackup/HHbbVV/condor/" +elif args.site == "ucsd": + eosdir = f"/ceph/cms/store/user/{args.user}/bbVV/{args.processor}/{args.tag}/{args.year}/" + user_condor_dir = f"/home/users/{args.user}/HHbbVV/condor/" samples = listdir(eosdir) jdls = [ @@ -62,7 +67,7 @@ running_jobs = [] if args.check_running: - os.system(f"condor_q {args.user}" "| awk '{print $9}' > running_jobs.txt") + os.system(f"condor_q {args.user} -nobatch" "| awk '{print $9}' > running_jobs.txt") with Path("running_jobs.txt").open() as f: lines = f.readlines() diff --git a/src/condor/submit.py b/src/condor/submit.py index 5eda9b87..a9e2662d 100755 --- a/src/condor/submit.py +++ b/src/condor/submit.py @@ -30,6 +30,7 @@ def get_site_vars(site): t2_local_prefix = Path("/ceph/cms/") t2_prefix = "root://redirector.t2.ucsd.edu:1095" if username == "rkansal": + # Reminder: need to re-copy this from /tmp whenever it expires (symlink?) proxy = "/home/users/rkansal/x509up_u31735" elif username == "annava": proxy = "/home/users/annava/projects/HHbbVV/test" diff --git a/src/condor/submit.templ.sh b/src/condor/submit.templ.sh index 78a99bbf..2050b35e 100644 --- a/src/condor/submit.templ.sh +++ b/src/condor/submit.templ.sh @@ -24,11 +24,11 @@ commithash=$$(git rev-parse HEAD) echo "https://github.com/$gituser/HHbbVV/commit/$${commithash}" > commithash.txt xrdcp -f commithash.txt $eosoutgithash -pip install -e . +pip3 install -e . # run code # pip install --user onnxruntime -python -u -W ignore $script --year $year --starti $starti --endi $endi --samples $sample --subsamples $subsample --processor $processor --maxchunks $maxchunks --chunksize $chunksize --label $label --njets $njets ${save_ak15} ${save_systematics} ${inference} ${save_all} ${lp_sfs} +python3 -u -W ignore $script --year $year --starti $starti --endi $endi --samples $sample --subsamples $subsample --processor $processor --maxchunks $maxchunks --chunksize $chunksize --label $label --njets $njets ${save_ak15} ${save_systematics} ${inference} ${save_all} ${lp_sfs} #move output to eos xrdcp -f outfiles/* $eosoutpkl diff --git a/src/condor/submit_configs/ttsfs_24_07_22_sig.yaml b/src/condor/submit_configs/ttsfs_24_07_22_sig.yaml new file mode 100644 index 00000000..c61cb660 --- /dev/null +++ b/src/condor/submit_configs/ttsfs_24_07_22_sig.yaml @@ -0,0 +1,18 @@ +{ + "TTbarScaleFactors": + { + "TTbar": { "files_per_job": 20, "subsamples": ["TTToSemiLeptonic"] }, + "SingleTop": + { + "files_per_job": 20, + "subsamples": + [ + "ST_tW_antitop_5f_NoFullyHadronicDecays", + "ST_tW_top_5f_NoFullyHadronicDecays", + "ST_s-channel_4f_leptonDecays", + "ST_t-channel_antitop_4f_InclusiveDecays", + "ST_t-channel_top_4f_InclusiveDecays", + ], + }, + }, +}