diff --git a/README.md b/README.md index ebdc9ae5..9dcb98ed 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,7 @@ Search for two boosted (high transverse momentum) Higgs bosons (H) decaying to t - [BDT Pre-Processing](#bdt-pre-processing) - [BDT Trainings](#bdt-trainings) - [Post-Processing](#post-processing-1) + - [Control plots with resonant and nonresonant samples](#control-plots-with-resonant-and-nonresonant-samples) - [Making separate background and signal templates for scan (resonant)](#making-separate-background-and-signal-templates-for-scan-resonant) - [Create Datacard](#create-datacard) - [PlotFits](#plotfits) @@ -41,6 +42,7 @@ Search for two boosted (high transverse momentum) Higgs bosons (H) decaying to t - [Signal injection tests](#signal-injection-tests) - [Misc](#misc) - [Command for copying directories to PRP in background](#command-for-copying-directories-to-prp-in-background) + - [Command for copying res samples to my laptop](#command-for-copying-res-samples-to-my-laptop) - [Get all running condor job names:](#get-all-running-condor-job-names) - [Crab data jobs recovery](#crab-data-jobs-recovery) @@ -270,6 +272,11 @@ Scan (non-resonant): for year in 2016 2016APV 2017 2018; do python -u postprocessing.py --templates --year $year --template-dir "templates/$TAG/" --data-dir "../../../../data/skimmer/Feb24/" --old-processor --nonres-txbb-wp "LP" "MP" "HP" --nonres-bdt-wp 0.995 0.998 0.999 --no-do-jshifts; done ``` +#### Control plots with resonant and nonresonant samples + +Run `postprocessing/bash_scripts/ControlPlots.sh` from inside `postprocessing folder`. + + #### Making separate background and signal templates for scan (resonant) Background and data: @@ -284,6 +291,7 @@ Signal: nohup bash -c 'for sample in NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150 NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250; do for year in 2016APV 2016 2017 2018; do python -u postprocessing.py --templates --year $year --template-dir "/eos/uscms/store/user/rkansal/bbVV/templates/$TAG/" --data-dir "/eos/uscms/store/user/rkansal/bbVV/skimmer/Feb24" --signal-data-dir "/eos/uscms/store/user/rkansal/bbVV/skimmer/Apr11" --resonant --sig-samples $sample --bg-keys "" --res-txbb-wp LP MP HP --res-thww-wp 0.4 0.6 0.8 0.9 0.94 0.96 0.98 --no-do-jshifts --templates-name $sample --no-data; done; done' &> outs/sigout.txt & ``` + ### Create Datacard Need `root==6.22.6`, and `square_coef` branch of https://github.com/rkansal47/rhalphalib installed (`pip install -e . --user` after checking out the branch). `CMSSW_11_2_0` recommended. @@ -417,6 +425,12 @@ mkdir ../copy_logs for i in *; do echo $i && sleep 3 && (nohup sh -c "krsync -av --progress --stats $i/root/ $HWWTAGGERDEP_POD:/hwwtaggervol/training/$FOLDER/$i" &> ../copy_logs/$i.txt &) done ``` +### Command for copying res samples to my laptop + +```bash +for sample in 'NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80' 'NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190' 'NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125' 'NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250' 'NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150'; do for year in 2016APV 2016 2017 2018; do rsync -avP rkansal@cmslpc-sl7.fnal.gov:eos/bbVV/skimmer/Apr11/$year/$sample data/skimmer/Apr11/$year/; done; done +``` + ### Get all running condor job names: diff --git a/src/HHbbVV/VBF_binder/VBFVectorTesting.ipynb b/src/HHbbVV/VBF_binder/VBFVectorTesting.ipynb index 78d8a496..25fb0fea 100644 --- a/src/HHbbVV/VBF_binder/VBFVectorTesting.ipynb +++ b/src/HHbbVV/VBF_binder/VBFVectorTesting.ipynb @@ -55,36 +55,44 @@ "\n", "# Generating dummy data for vbf1\n", "np.random.seed(42)\n", - "vbf1 = vector.array({\n", - " \"pt\": np.random.rand(5),\n", - " \"phi\": np.random.rand(5),\n", - " \"eta\": np.random.rand(5),\n", - " \"M\": np.random.rand(5)\n", - "})\n", + "vbf1 = vector.array(\n", + " {\n", + " \"pt\": np.random.rand(5),\n", + " \"phi\": np.random.rand(5),\n", + " \"eta\": np.random.rand(5),\n", + " \"M\": np.random.rand(5),\n", + " }\n", + ")\n", "\n", "# Generating dummy data for vbf2\n", - "vbf2 = vector.array({\n", - " \"pt\": np.random.rand(5),\n", - " \"phi\": np.random.rand(5),\n", - " \"eta\": np.random.rand(5),\n", - " \"M\": np.random.rand(5)\n", - "})\n", + "vbf2 = vector.array(\n", + " {\n", + " \"pt\": np.random.rand(5),\n", + " \"phi\": np.random.rand(5),\n", + " \"eta\": np.random.rand(5),\n", + " \"M\": np.random.rand(5),\n", + " }\n", + ")\n", "\n", "# Generating dummy data for VVJet\n", - "VVJet = vector.array({\n", - " \"pt\": np.random.rand(5),\n", - " \"phi\": np.random.rand(5),\n", - " \"eta\": np.random.rand(5),\n", - " \"M\": np.random.rand(5)\n", - "})\n", + "VVJet = vector.array(\n", + " {\n", + " \"pt\": np.random.rand(5),\n", + " \"phi\": np.random.rand(5),\n", + " \"eta\": np.random.rand(5),\n", + " \"M\": np.random.rand(5),\n", + " }\n", + ")\n", "\n", "# Generating dummy data for bbJet\n", - "bbJet = vector.array({\n", - " \"pt\": np.random.rand(5),\n", - " \"phi\": np.random.rand(5),\n", - " \"eta\": np.random.rand(5),\n", - " \"M\": np.random.rand(5)\n", - "})\n", + "bbJet = vector.array(\n", + " {\n", + " \"pt\": np.random.rand(5),\n", + " \"phi\": np.random.rand(5),\n", + " \"eta\": np.random.rand(5),\n", + " \"M\": np.random.rand(5),\n", + " }\n", + ")\n", "\n", "end_time = time.time()\n", "elapsed_time = end_time - start_time\n", @@ -99,7 +107,7 @@ "\n", "# Boost using boost_p4\n", "vbf1_boosted_p4 = vbf1.boostCM_of_p4(system_4momentum)\n", - "#vbf1_boosted_p4 = vbf1.boost_p4(-system_4momentum)\n", + "# vbf1_boosted_p4 = vbf1.boost_p4(-system_4momentum)\n", "\n", "# Boost using to_beta3\n", "vbf1_boosted_beta = vbf1.boost(-system_4momentum.to_beta3())\n", @@ -107,7 +115,7 @@ "# Compare the results\n", "difference = vbf1_boosted_p4 - vbf1_boosted_beta\n", "\n", - "print(f'difference: {difference}')\n" + "print(f\"difference: {difference}\")" ] }, { @@ -148,7 +156,7 @@ "particle_A_boosted_beta3 = particle_A.boost(system_beta3)\n", "particle_B_boosted_beta3 = particle_B.boost(system_beta3)\n", "\n", - "particle_A_boosted_p4, particle_B_boosted_p4, particle_A_boosted_beta3, particle_B_boosted_beta3\n" + "particle_A_boosted_p4, particle_B_boosted_p4, particle_A_boosted_beta3, particle_B_boosted_beta3" ] }, { @@ -186,6 +194,7 @@ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", + "\n", "def to_four_momentum(pt, phi, eta, m):\n", " px = pt * np.cos(phi)\n", " py = pt * np.sin(phi)\n", @@ -193,6 +202,7 @@ " E = np.sqrt(m**2 + px**2 + py**2 + pz**2)\n", " return E, px, py, pz\n", "\n", + "\n", "def lorentz_transform(p, gamma, beta):\n", " E, px, py, pz = p\n", " bp = beta[0] * px + beta[1] * py + beta[2] * pz\n", @@ -203,10 +213,12 @@ " pz_prime = pz + beta[2] * (factor + E / (gamma + 1))\n", " return E_prime, px_prime, py_prime, pz_prime\n", "\n", + "\n", "def boost_particles(particles, beta_vector):\n", " gamma = 1.0 / np.sqrt(1.0 - np.sum(beta_vector**2))\n", " return np.array([lorentz_transform(p, gamma, beta_vector) for p in particles])\n", "\n", + "\n", "# Particle data\n", "massive_pt = np.array([200, 250])\n", "massive_phi = np.array([0, 0])\n", @@ -219,10 +231,22 @@ "smaller_m = np.array([10, 10, 10, 10])\n", "\n", "# Manual boost\n", - "massive_particles = np.array([to_four_momentum(pt, phi, eta, m) for pt, phi, eta, m in zip(massive_pt, massive_phi, massive_eta, massive_m)])\n", - "smaller_particles = np.array([to_four_momentum(pt, phi, eta, m) for pt, phi, eta, m in zip(smaller_pt, smaller_phi, smaller_eta, smaller_m)])\n", + "massive_particles = np.array(\n", + " [\n", + " to_four_momentum(pt, phi, eta, m)\n", + " for pt, phi, eta, m in zip(massive_pt, massive_phi, massive_eta, massive_m)\n", + " ]\n", + ")\n", + "smaller_particles = np.array(\n", + " [\n", + " to_four_momentum(pt, phi, eta, m)\n", + " for pt, phi, eta, m in zip(smaller_pt, smaller_phi, smaller_eta, smaller_m)\n", + " ]\n", + ")\n", "all_particles = np.concatenate([massive_particles, smaller_particles])\n", - "boost_vector = -(massive_particles[0] + massive_particles[1])[:3] / massive_particles[0][0] # px, py, pz divided by energy\n", + "boost_vector = (\n", + " -(massive_particles[0] + massive_particles[1])[:3] / massive_particles[0][0]\n", + ") # px, py, pz divided by energy\n", "boosted_particles_manual = boost_particles(all_particles, boost_vector)\n", "\n", "# Vector library boost (pseudo code for reference)\n", @@ -242,8 +266,7 @@ "boosted_particles_vector = all_particles.boost(boost_vector)\n", "\"\"\"\n", "\n", - "# Visualization code would follow here, comparing `boosted_particles_manual` with `boosted_particles_vector`\n", - "\n" + "# Visualization code would follow here, comparing `boosted_particles_manual` with `boosted_particles_vector`" ] }, { @@ -290,6 +313,7 @@ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", + "\n", "def to_four_momentum(pt, phi, eta, m):\n", " px = pt * np.cos(phi)\n", " py = pt * np.sin(phi)\n", @@ -297,28 +321,47 @@ " E = np.sqrt(m**2 + px**2 + py**2 + pz**2)\n", " return E, px, py, pz\n", "\n", + "\n", "def lorentz_transform(p, boost_vector):\n", " gamma = 1 / np.sqrt(1 - np.dot(boost_vector, boost_vector))\n", " E, px, py, pz = p\n", " E_prime = gamma * (E - np.dot(p[1:], boost_vector))\n", - " spatial_part = np.array(p[1:]) + (gamma - 1) * np.dot(p[1:], boost_vector) / np.dot(boost_vector, boost_vector) * boost_vector - gamma * E * boost_vector\n", + " spatial_part = (\n", + " np.array(p[1:])\n", + " + (gamma - 1)\n", + " * np.dot(p[1:], boost_vector)\n", + " / np.dot(boost_vector, boost_vector)\n", + " * boost_vector\n", + " - gamma * E * boost_vector\n", + " )\n", " return E_prime, *spatial_part\n", "\n", + "\n", "def get_eta(px, pz):\n", " pt = np.sqrt(px**2)\n", - " theta = np.arccos(pz/np.sqrt(px**2 + pz**2))\n", - " eta = -np.log(np.tan(theta/2))\n", + " theta = np.arccos(pz / np.sqrt(px**2 + pz**2))\n", + " eta = -np.log(np.tan(theta / 2))\n", " return eta\n", "\n", + "\n", "def plot_particles(particles, title=\"\"):\n", - " colors = ['b', 'g', 'r', 'c', 'm', 'y']\n", + " colors = [\"b\", \"g\", \"r\", \"c\", \"m\", \"y\"]\n", " fig, ax = plt.subplots(figsize=(8, 8))\n", " for idx, particle in enumerate(particles):\n", - " ax.quiver(0, 0, particle[3], particle[1], angles='xy', scale_units='xy', scale=5, color=colors[idx])\n", + " ax.quiver(\n", + " 0,\n", + " 0,\n", + " particle[3],\n", + " particle[1],\n", + " angles=\"xy\",\n", + " scale_units=\"xy\",\n", + " scale=5,\n", + " color=colors[idx],\n", + " )\n", " ax.set_xlim(-100, 100)\n", " ax.set_ylim(-100, 100)\n", - " ax.axhline(0, color='black',linewidth=0.5)\n", - " ax.axvline(0, color='black',linewidth=0.5)\n", + " ax.axhline(0, color=\"black\", linewidth=0.5)\n", + " ax.axvline(0, color=\"black\", linewidth=0.5)\n", " ax.set_ylabel(\"x (transverse) [m]\")\n", " ax.set_xlabel(\"z (along beam) [m]\")\n", " ax.set_title(title)\n", @@ -326,15 +369,24 @@ " # Adding labels\n", " labels = []\n", " for idx, particle in enumerate(particles):\n", - " pt = np.sqrt(particle[1]**2)\n", + " pt = np.sqrt(particle[1] ** 2)\n", " eta = get_eta(particle[1], particle[3])\n", - " mass = np.sqrt(particle[0]**2 - (particle[1]**2 + particle[2]**2 + particle[3]**2))\n", + " mass = np.sqrt(particle[0] ** 2 - (particle[1] ** 2 + particle[2] ** 2 + particle[3] ** 2))\n", " label = f\"Particle {idx+1} (color {colors[idx]}): pt: {pt:.2f} GeV, eta: {eta:.2f}, m: {mass:.2f} GeV\"\n", " labels.append(label)\n", - " props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)\n", - " ax.text(0.05, 0.95, '\\n'.join(labels), transform=ax.transAxes, fontsize=8, verticalalignment='top', bbox=props)\n", + " props = dict(boxstyle=\"round\", facecolor=\"wheat\", alpha=0.5)\n", + " ax.text(\n", + " 0.05,\n", + " 0.95,\n", + " \"\\n\".join(labels),\n", + " transform=ax.transAxes,\n", + " fontsize=8,\n", + " verticalalignment=\"top\",\n", + " bbox=props,\n", + " )\n", " plt.show()\n", "\n", + "\n", "# Define particles\n", "particles = [\n", " to_four_momentum(200, 0, 1, 100),\n", @@ -342,7 +394,7 @@ " to_four_momentum(30, 0, 1.5, 10),\n", " to_four_momentum(40, 0, -1.3, 10),\n", " to_four_momentum(20, 0, 2, 10),\n", - " to_four_momentum(25, 0, -2.2, 10)\n", + " to_four_momentum(25, 0, -2.2, 10),\n", "]\n", "\n", "# Print momentum sum before boost\n", @@ -358,7 +410,7 @@ "total_py = np.sum([p[2] for p in particles])\n", "total_pz = np.sum([p[3] for p in particles])\n", "\n", - "boost_vector = np.array([total_px/total_E, total_py/total_E, total_pz/total_E])\n", + "boost_vector = np.array([total_px / total_E, total_py / total_E, total_pz / total_E])\n", "\n", "# Apply boost to particles\n", "boosted_particles = [lorentz_transform(p, boost_vector) for p in particles]\n", @@ -422,27 +474,46 @@ "import matplotlib.pyplot as plt\n", "import vector\n", "\n", + "\n", "def plot_particles(particles, title=\"\"):\n", - " colors = ['b', 'g', 'r', 'c', 'm', 'y']\n", + " colors = [\"b\", \"g\", \"r\", \"c\", \"m\", \"y\"]\n", " fig, ax = plt.subplots(figsize=(8, 8))\n", " for idx, particle in enumerate(particles):\n", - " ax.quiver(0, 0, particle.pz, particle.px, angles='xy', scale_units='xy', scale=5, color=colors[idx])\n", + " ax.quiver(\n", + " 0,\n", + " 0,\n", + " particle.pz,\n", + " particle.px,\n", + " angles=\"xy\",\n", + " scale_units=\"xy\",\n", + " scale=5,\n", + " color=colors[idx],\n", + " )\n", " ax.set_xlim(-100, 100)\n", " ax.set_ylim(-100, 100)\n", - " ax.axhline(0, color='black',linewidth=0.5)\n", - " ax.axvline(0, color='black',linewidth=0.5)\n", + " ax.axhline(0, color=\"black\", linewidth=0.5)\n", + " ax.axvline(0, color=\"black\", linewidth=0.5)\n", " ax.set_xlabel(\"x (transverse) [m]\")\n", " ax.set_ylabel(\"z (along beam) [m]\")\n", " ax.set_title(title)\n", - " \n", + "\n", " # Adding colored labels\n", " for idx, particle in enumerate(particles):\n", " label = f\"Particle {idx+1}: pt: {particle.pt:.2f} GeV, eta: {particle.eta:.2f}, m: {particle.M:.2f} GeV\"\n", - " props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)\n", - " ax.text(0.05, 0.95 - 0.05 * idx, label, transform=ax.transAxes, fontsize=8, verticalalignment='top', bbox=props, color=colors[idx])\n", + " props = dict(boxstyle=\"round\", facecolor=\"wheat\", alpha=0.5)\n", + " ax.text(\n", + " 0.05,\n", + " 0.95 - 0.05 * idx,\n", + " label,\n", + " transform=ax.transAxes,\n", + " fontsize=8,\n", + " verticalalignment=\"top\",\n", + " bbox=props,\n", + " color=colors[idx],\n", + " )\n", "\n", " plt.show()\n", - " \n", + "\n", "\n", "# Define particles using pt, eta, phi, and m\n", "particles = [\n", @@ -451,7 +522,7 @@ " vector.obj(pt=30, eta=1.5, phi=0, M=10),\n", " vector.obj(pt=40, eta=-1.3, phi=0, M=10),\n", " vector.obj(pt=20, eta=2, phi=0, M=10),\n", - " vector.obj(pt=25, eta=-2.2, phi=0, M=10)\n", + " vector.obj(pt=25, eta=-2.2, phi=0, M=10),\n", "]\n", "\n", "# Plot original particles\n", @@ -471,7 +542,7 @@ "\n", "print(system_4vec)\n", "print(particles)\n", - "print(boosted_particles[5].phi)\n" + "print(boosted_particles[5].phi)" ] }, { @@ -499,6 +570,7 @@ "source": [ "import numpy as np\n", "\n", + "\n", "def to_four_momentum(pt, phi, eta, m):\n", " px = pt * np.cos(phi)\n", " py = pt * np.sin(phi)\n", @@ -506,15 +578,24 @@ " E = np.sqrt(m**2 + px**2 + py**2 + pz**2)\n", " return E, px, py, pz\n", "\n", + "\n", "def lorentz_transform(p, boost_vector):\n", " gamma = 1 / np.sqrt(1 - np.dot(boost_vector, boost_vector))\n", - " print('doting',boost_vector,np.dot(boost_vector, boost_vector))\n", + " print(\"doting\", boost_vector, np.dot(boost_vector, boost_vector))\n", " E, px, py, pz = p\n", " E_prime = gamma * (E - np.dot(p[1:], boost_vector))\n", - " spatial_part = np.array(p[1:]) + (gamma - 1) * np.dot(p[1:], boost_vector) / np.dot(boost_vector, boost_vector) * boost_vector - gamma * E * boost_vector\n", + " spatial_part = (\n", + " np.array(p[1:])\n", + " + (gamma - 1)\n", + " * np.dot(p[1:], boost_vector)\n", + " / np.dot(boost_vector, boost_vector)\n", + " * boost_vector\n", + " - gamma * E * boost_vector\n", + " )\n", " print(spatial_part)\n", " return E_prime, *spatial_part\n", "\n", + "\n", "def from_four_momentum(E, px, py, pz):\n", " pt = np.sqrt(px**2 + py**2)\n", " phi = np.arctan2(py, px)\n", @@ -523,40 +604,42 @@ " m = np.sqrt(E**2 - (px**2 + py**2 + pz**2))\n", " return pt, phi, eta, m\n", "\n", + "\n", "def boost_particle_to_system_frame(particles, other_particle):\n", " # Convert given particles to four-momentum\n", " particles_4momentum = [to_four_momentum(*particle) for particle in particles]\n", - " \n", + "\n", " # Compute the total 4-momentum of the system\n", " total_E = sum(p[0] for p in particles_4momentum)\n", " total_px = sum(p[1] for p in particles_4momentum)\n", " total_py = sum(p[2] for p in particles_4momentum)\n", " total_pz = sum(p[3] for p in particles_4momentum)\n", - " boost_vector = np.array([total_px/total_E, total_py/total_E, total_pz/total_E])\n", + " boost_vector = np.array([total_px / total_E, total_py / total_E, total_pz / total_E])\n", "\n", " # Convert other particle to four-momentum\n", " other_particle_4momentum = to_four_momentum(*other_particle)\n", - " \n", + "\n", " # Boost the other particle\n", " boosted_other_particle_4momentum = lorentz_transform(other_particle_4momentum, boost_vector)\n", - " \n", + "\n", " # Convert the boosted particle back to (pt, phi, eta, m) coordinates\n", " return from_four_momentum(*boosted_other_particle_4momentum)\n", "\n", + "\n", "# Define particles\n", "particles = [\n", " (200, 0, 1, 100),\n", " (250, 0, -1.1, 100),\n", " (30, 0, 1.5, 10),\n", - " (40, 0, -1.3, 10),#(20, 0, 2, 10),(25, 0, -2.2, 10)\n", + " (40, 0, -1.3, 10), # (20, 0, 2, 10),(25, 0, -2.2, 10)\n", "]\n", "\n", "# Define other particle\n", - "other_particle = (200, 0, 1, 100) #(25, 0, -2.2, 10)\n", + "other_particle = (200, 0, 1, 100) # (25, 0, -2.2, 10)\n", "\n", "# Boost other particle to the frame of the particle system\n", "boosted_other_particle = boost_particle_to_system_frame(particles, other_particle)\n", - "print(boosted_other_particle)\n" + "print(boosted_other_particle)" ] }, { @@ -580,24 +663,38 @@ "import numpy as np\n", "import time\n", "\n", + "\n", "def to_four_momentum(pt, phi, eta, m):\n", - " px = pt * np.cos(phi)\n", - " py = pt * np.sin(phi)\n", - " pz = pt * np.sinh(eta)\n", - " E = np.sqrt(m**2 + px**2 + py**2 + pz**2)\n", - " return E, px, py, pz\n", - " \n", + " px = pt * np.cos(phi)\n", + " py = pt * np.sin(phi)\n", + " pz = pt * np.sinh(eta)\n", + " E = np.sqrt(m**2 + px**2 + py**2 + pz**2)\n", + " return E, px, py, pz\n", + "\n", + "\n", "def compute_spatial_part(E, px, py, pz, boost_vector, boost_vector_dot_product):\n", " gamma = 1.0 / np.sqrt(1.0 - boost_vector_dot_product)\n", - " \n", + "\n", " # Dot product of momentum and boost vector\n", " p_dot_v = px * boost_vector[0] + py * boost_vector[1] + pz * boost_vector[2]\n", - " \n", + "\n", " # Lorentz transformation for the spatial components\n", - " px_prime = px + (gamma - 1) * p_dot_v * boost_vector[0] / boost_vector_dot_product - gamma * E * boost_vector[0]\n", - " py_prime = py + (gamma - 1) * p_dot_v * boost_vector[1] / boost_vector_dot_product - gamma * E * boost_vector[1]\n", - " pz_prime = pz + (gamma - 1) * p_dot_v * boost_vector[2] / boost_vector_dot_product - gamma * E * boost_vector[2]\n", - " \n", + " px_prime = (\n", + " px\n", + " + (gamma - 1) * p_dot_v * boost_vector[0] / boost_vector_dot_product\n", + " - gamma * E * boost_vector[0]\n", + " )\n", + " py_prime = (\n", + " py\n", + " + (gamma - 1) * p_dot_v * boost_vector[1] / boost_vector_dot_product\n", + " - gamma * E * boost_vector[1]\n", + " )\n", + " pz_prime = (\n", + " pz\n", + " + (gamma - 1) * p_dot_v * boost_vector[2] / boost_vector_dot_product\n", + " - gamma * E * boost_vector[2]\n", + " )\n", + "\n", " return px_prime, py_prime, pz_prime\n", "\n", "\n", @@ -619,11 +716,17 @@ "total_px = pxVV + pxbb + px1 + px2\n", "total_py = pyVV + pybb + py1 + py2\n", "total_pz = pzVV + pzbb + pz1 + pz2\n", - "CM_boost_vector = np.array([total_px/total_E, total_py/total_E, total_pz/total_E])\n", - "boost_vector_dot_product = CM_boost_vector[0]**2 + CM_boost_vector[1]**2 + CM_boost_vector[2]**2\n", - "\n", - "px1_boosted, py1_boosted, pz1_boosted = compute_spatial_part(E1, px1, py1, pz1, CM_boost_vector, boost_vector_dot_product)\n", - "px2_boosted, py2_boosted, pz2_boosted = compute_spatial_part(E2, px2, py2, pz2, CM_boost_vector, boost_vector_dot_product)\n", + "CM_boost_vector = np.array([total_px / total_E, total_py / total_E, total_pz / total_E])\n", + "boost_vector_dot_product = (\n", + " CM_boost_vector[0] ** 2 + CM_boost_vector[1] ** 2 + CM_boost_vector[2] ** 2\n", + ")\n", + "\n", + "px1_boosted, py1_boosted, pz1_boosted = compute_spatial_part(\n", + " E1, px1, py1, pz1, CM_boost_vector, boost_vector_dot_product\n", + ")\n", + "px2_boosted, py2_boosted, pz2_boosted = compute_spatial_part(\n", + " E2, px2, py2, pz2, CM_boost_vector, boost_vector_dot_product\n", + ")\n", "\n", "theta1 = np.arccos(pz1_boosted / np.sqrt(px1_boosted**2 + py1_boosted**2 + pz1_boosted**2))\n", "theta2 = np.arccos(pz2_boosted / np.sqrt(px2_boosted**2 + py2_boosted**2 + pz2_boosted**2))\n", @@ -632,7 +735,7 @@ "print(CM_boost_vector)\n", "print(px1_boosted, py1_boosted, pz1_boosted)\n", "print(\"Cos(Theta) for vbf1:\", np.cos(theta1))\n", - "print(\"Cos(Theta) for vbf2:\", np.cos(theta2))\n" + "print(\"Cos(Theta) for vbf2:\", np.cos(theta2))" ] }, { @@ -658,6 +761,7 @@ "def to_vector(pt, eta, phi, mass):\n", " return vector.obj(pt=pt, eta=eta, phi=phi, mass=mass)\n", "\n", + "\n", "# Generate example data for a single event\n", "vbf1 = to_vector(200, 1, 0, 100)\n", "vbf2 = to_vector(250, -1.1, 0, 100)\n", @@ -665,7 +769,7 @@ "bbJet = to_vector(40, -1.3, 0, 10)\n", "\n", "# Create a combined 4-momentum for the system\n", - "system_4vec = vbf1 + vbf2 + VVJet + bbJet \n", + "system_4vec = vbf1 + vbf2 + VVJet + bbJet\n", "\n", "# Boost the vectors into the center-of-mass frame of the system\n", "j1_CMF = vbf1.boostCM_of_p4(system_4vec)\n", @@ -677,7 +781,7 @@ "\n", "print(\"Method 2 results:\")\n", "print(\"Cos(Theta) for vbf1:\", np.cos(thetab1))\n", - "print(\"Cos(Theta) for vbf2:\", np.cos(thetab2))\n" + "print(\"Cos(Theta) for vbf2:\", np.cos(thetab2))" ] } ], diff --git a/src/HHbbVV/VBF_binder/VBFgenInfoTests.ipynb b/src/HHbbVV/VBF_binder/VBFgenInfoTests.ipynb index 90cb3778..1bf65432 100644 --- a/src/HHbbVV/VBF_binder/VBFgenInfoTests.ipynb +++ b/src/HHbbVV/VBF_binder/VBFgenInfoTests.ipynb @@ -428,11 +428,10 @@ } ], "source": [ - "df = pd.read_parquet('0-2.parquet')\n", + "df = pd.read_parquet(\"0-2.parquet\")\n", "\n", "df.columns.values.tolist()\n", - "df\n", - " " + "df" ] }, { @@ -468,27 +467,31 @@ "source": [ "# Apply lepton veto selections ($N_\\mu =0$ and $N_e=0$)\n", "print(np.shape(df))\n", - "df_em = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0)]\n", + "df_em = df[(df[(\"nGoodMuons\", 0)] == 0) & (df[(\"nGoodElectrons\", 0)] == 0)]\n", "\n", "# 2 vbf jets\n", - "#df_vbf = df[ (df[('nGoodVBFJetsUnsorted', 0)] >= 2)]\n", + "# df_vbf = df[ (df[('nGoodVBFJetsUnsorted', 0)] >= 2)]\n", "\n", "# lepton veto and 2 vbf jets\n", - "#df_sorted_Rand = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsUnsorted', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", - "df_sorted_pt = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsSortedpt', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", - "#df_sorted_M = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsSortedM', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", - "#df_sorted_eta = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsSortedeta', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", - "\n", + "# df_sorted_Rand = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsUnsorted', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", + "df_sorted_pt = df[\n", + " (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(\"nGoodVBFJetsSortedpt\", 0)] >= 2)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + "]\n", + "# df_sorted_M = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsSortedM', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", + "# df_sorted_eta = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsSortedeta', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", "\n", "\n", "# generate all variables that are needed for the ak8 jet selections in VBF HH4b paper.\n", - "#df[('DijetDeltaPhi', 0)] = np.abs(df[('ak8FatJetPhi', 0)] - df[('ak8FatJetPhi', 1)])\n", - "#df[('DijetDeltaEta', 0)] = np.abs(df[('ak8FatJetEta', 0)] - df[('ak8FatJetEta', 1)])\n", - "#print(np.shape(df)[0],np.shape(df_sorted_Rand)[0],np.shape(df_sorted_pt)[0],np.shape(df_sorted_M)[0],np.shape(df_sorted_eta)[0])\n", - "print('thing',np.sum(df['weight']),np.sum(df['weight_noxsec']))\n", - "print('tsts',np.sum(df_sorted_pt['weight']),np.sum(df_sorted_pt['weight_noxsec']))\n", + "# df[('DijetDeltaPhi', 0)] = np.abs(df[('ak8FatJetPhi', 0)] - df[('ak8FatJetPhi', 1)])\n", + "# df[('DijetDeltaEta', 0)] = np.abs(df[('ak8FatJetEta', 0)] - df[('ak8FatJetEta', 1)])\n", + "# print(np.shape(df)[0],np.shape(df_sorted_Rand)[0],np.shape(df_sorted_pt)[0],np.shape(df_sorted_M)[0],np.shape(df_sorted_eta)[0])\n", + "print(\"thing\", np.sum(df[\"weight\"]), np.sum(df[\"weight_noxsec\"]))\n", + "print(\"tsts\", np.sum(df_sorted_pt[\"weight\"]), np.sum(df_sorted_pt[\"weight_noxsec\"]))\n", "df_sorted_pt\n", - "print(np.sum(df_sorted_pt['weight_noxsec']))\n" + "print(np.sum(df_sorted_pt[\"weight_noxsec\"]))" ] }, { @@ -643,16 +646,35 @@ " dphi = np.where(dphi < np.pi, dphi, 2 * np.pi - dphi)\n", " return np.sqrt(deta**2 + dphi**2)\n", "\n", + "\n", "# Calculate delta R for each sorting method and add it to the DataFrame\n", - "for df, method in zip([df_sorted_Rand, df_sorted_pt, df_sorted_M, df_sorted_eta], ['Rand', 'pt', 'M', 'eta']):\n", - " df[\"deltaR_1_to_0\"] = deltaR(df[('vbfetaGen', 0)], df[('vbfphiGen', 0)],\n", - " df[('vbfetaSorted'+method, 0)], df[('vbfphiSorted'+method, 0)])\n", - " df[\"deltaR_1_to_1\"] = deltaR(df[('vbfetaGen', 1)], df[('vbfphiGen', 1)],\n", - " df[('vbfetaSorted'+method, 0)], df[('vbfphiSorted'+method, 0)])\n", - " df[\"deltaR_2_to_0\"] = deltaR(df[('vbfetaGen', 0)], df[('vbfphiGen', 0)],\n", - " df[('vbfetaSorted'+method, 1)], df[('vbfphiSorted'+method, 1)])\n", - " df[\"deltaR_2_to_1\"] = deltaR(df[('vbfetaGen', 1)], df[('vbfphiGen', 1)],\n", - " df[('vbfetaSorted'+method, 1)], df[('vbfphiSorted'+method, 1)])\n", + "for df, method in zip(\n", + " [df_sorted_Rand, df_sorted_pt, df_sorted_M, df_sorted_eta], [\"Rand\", \"pt\", \"M\", \"eta\"]\n", + "):\n", + " df[\"deltaR_1_to_0\"] = deltaR(\n", + " df[(\"vbfetaGen\", 0)],\n", + " df[(\"vbfphiGen\", 0)],\n", + " df[(\"vbfetaSorted\" + method, 0)],\n", + " df[(\"vbfphiSorted\" + method, 0)],\n", + " )\n", + " df[\"deltaR_1_to_1\"] = deltaR(\n", + " df[(\"vbfetaGen\", 1)],\n", + " df[(\"vbfphiGen\", 1)],\n", + " df[(\"vbfetaSorted\" + method, 0)],\n", + " df[(\"vbfphiSorted\" + method, 0)],\n", + " )\n", + " df[\"deltaR_2_to_0\"] = deltaR(\n", + " df[(\"vbfetaGen\", 0)],\n", + " df[(\"vbfphiGen\", 0)],\n", + " df[(\"vbfetaSorted\" + method, 1)],\n", + " df[(\"vbfphiSorted\" + method, 1)],\n", + " )\n", + " df[\"deltaR_2_to_1\"] = deltaR(\n", + " df[(\"vbfetaGen\", 1)],\n", + " df[(\"vbfphiGen\", 1)],\n", + " df[(\"vbfetaSorted\" + method, 1)],\n", + " df[(\"vbfphiSorted\" + method, 1)],\n", + " )\n", "\n", "# Define a function to create the required table\n", "def display_table(df, method):\n", @@ -661,14 +683,14 @@ " table_string += f\"Number of events: {len(df)}\\n\"\n", " table_string += \"| Index | Jet | dR to gen quark 1 | dR to gen quark 2 |\\n\"\n", " table_string += \"| --- | --- | --- | --- |\\n\"\n", - " \n", + "\n", " # Add each row to the table string\n", " for index in df.index[:10]:\n", " table_string += f\"| {index} | Jet 1 | {df['deltaR_1_to_0'][index]:.2f} | {df['deltaR_1_to_1'][index]:.2f} |\\n\"\n", " table_string += f\"| {index} | Jet 2 | {df['deltaR_2_to_0'][index]:.2f} | {df['deltaR_2_to_1'][index]:.2f} |\\n\"\n", "\n", " # Calculate the summary information\n", - " dR_cols = ['deltaR_1_to_0', 'deltaR_1_to_1', 'deltaR_2_to_0', 'deltaR_2_to_1']\n", + " dR_cols = [\"deltaR_1_to_0\", \"deltaR_1_to_1\", \"deltaR_2_to_0\", \"deltaR_2_to_1\"]\n", " counts = (df[dR_cols] < 0.4).sum(axis=1).value_counts()\n", "\n", " # Add the summary information to the table string\n", @@ -679,14 +701,12 @@ " # Display the table string\n", " print(table_string)\n", "\n", - "# Display tables for each DataFrame\n", - "display_table(df_sorted_Rand, 'Rand')\n", - "display_table(df_sorted_pt, 'pt')\n", - "display_table(df_sorted_M, 'M')\n", - "display_table(df_sorted_eta, 'eta')\n", "\n", - "\n", - "\n" + "# Display tables for each DataFrame\n", + "display_table(df_sorted_Rand, \"Rand\")\n", + "display_table(df_sorted_pt, \"pt\")\n", + "display_table(df_sorted_M, \"M\")\n", + "display_table(df_sorted_eta, \"eta\")" ] }, { @@ -1302,7 +1322,7 @@ "WARNING:root:[213, 976, 395, 1584, 1.9, 1.9]\n", "\"\"\"\n", "\n", - "log_string_filtered_full2 = '''\n", + "log_string_filtered_full2 = \"\"\"\n", "WARNING:root:[247, 981, 371, 1599, 0.6, 0.6]\n", "WARNING:root:[247, 977, 376, 1600, 0.6, 0.7]\n", "WARNING:root:[246, 976, 377, 1599, 0.6, 0.8]\n", @@ -1697,7 +1717,7 @@ "WARNING:root:[277, 995, 396, 1668, 1.9, 1.7]\n", "WARNING:root:[277, 993, 395, 1665, 1.9, 1.8]\n", "WARNING:root:[278, 995, 395, 1668, 1.9, 1.9]\n", - "'''" + "\"\"\"" ] }, { @@ -1707,7 +1727,7 @@ "metadata": {}, "outputs": [], "source": [ - "mega_log_string_filtered = '''\n", + "mega_log_string_filtered = \"\"\"\n", "WARNING:root:[269, 938, 385, 1592, 0.6, 0.6]\n", "WARNING:root:[270, 938, 387, 1595, 0.6, 0.7]\n", "WARNING:root:[270, 938, 388, 1596, 0.6, 0.8]\n", @@ -2688,7 +2708,7 @@ "WARNING:root:[214, 979, 398, 1591, 1.9, 1.7]\n", "WARNING:root:[214, 975, 397, 1586, 1.9, 1.8]\n", "WARNING:root:[213, 976, 395, 1584, 1.9, 1.9]\n", - "'''" + "\"\"\"" ] }, { @@ -2744,9 +2764,10 @@ " # Extract the list and convert it to numbers\n", " values = list(map(float, match.group(1).split(\", \")))\n", " # Extract count[2], R1, and R2, and calculate count[2]/9600\n", - " data.append([(values[1]+values[2])/9600, values[4], values[5]])\n", + " data.append([(values[1] + values[2]) / 9600, values[4], values[5]])\n", " return np.array(data)\n", "\n", + "\n", "def parse_mega_log_string(mega_log_string):\n", " # Split the mega log string into individual logs\n", " log_strings = mega_log_string.strip().split(\"\\n\\n\")\n", @@ -2760,7 +2781,7 @@ " # Extract the list and convert it to numbers\n", " values = list(map(float, match.group(1).split(\", \")))\n", " # Extract count[2], R1, and R2, and calculate count[2]/9600\n", - " r1, r2, count = values[4], values[5], (values[1]+values[2])/9600\n", + " r1, r2, count = values[4], values[5], (values[1] + values[2]) / 9600\n", " # Store the values in the data dictionary\n", " if (r1, r2) in data:\n", " data[(r1, r2)].append(count)\n", @@ -2772,8 +2793,9 @@ " averaged_data.append([sum(data[key]) / len(data[key]), key[0], key[1]])\n", " return np.array(averaged_data)\n", "\n", + "\n", "def parse_log_file(file_path):\n", - " with open(file_path, 'r') as file:\n", + " with open(file_path, \"r\") as file:\n", " lines = file.readlines()\n", "\n", " data = []\n", @@ -2789,55 +2811,60 @@ "\n", " return np.array(data)\n", "\n", - "def plot_heatmap(data,title = 'Heatmap of percent unfiltered events with 2 matched vbf jets'):\n", + "\n", + "def plot_heatmap(data, title=\"Heatmap of percent unfiltered events with 2 matched vbf jets\"):\n", " # Create a 2D array for the heatmap\n", - " heatmap_data = np.zeros((len(np.unique(data[:,1])), len(np.unique(data[:,2]))))\n", + " heatmap_data = np.zeros((len(np.unique(data[:, 1])), len(np.unique(data[:, 2]))))\n", " for row in data:\n", " # Map the values of R1 and R2 to indices in the heatmap array\n", - " r1_idx = np.where(np.unique(data[:,1]) == row[1])[0][0]\n", - " r2_idx = np.where(np.unique(data[:,2]) == row[2])[0][0]\n", + " r1_idx = np.where(np.unique(data[:, 1]) == row[1])[0][0]\n", + " r2_idx = np.where(np.unique(data[:, 2]) == row[2])[0][0]\n", " # Set the heatmap value to count[2]/9600\n", " heatmap_data[r1_idx, r2_idx] = row[0]\n", - " \n", + "\n", " # Define grid\n", - " x = np.unique(data[:,2])\n", - " y = np.unique(data[:,1])\n", + " x = np.unique(data[:, 2])\n", + " y = np.unique(data[:, 1])\n", " # Adjust the coordinates to represent the corners of the cells\n", " x_grid = np.concatenate([x - 0.05, [x[-1] + 0.05]])\n", " y_grid = np.concatenate([y - 0.05, [y[-1] + 0.05]])\n", "\n", " # Create the heatmap\n", " plt.figure(figsize=(8, 6))\n", - " plt.pcolormesh(x_grid, y_grid, heatmap_data, cmap='viridis', shading='auto')\n", - " plt.colorbar(label='# events w 1 match + 2 matched vbf jets /num events passing ak8 selection')\n", - " plt.xlabel('R1')\n", - " plt.ylabel('R2')\n", + " plt.pcolormesh(x_grid, y_grid, heatmap_data, cmap=\"viridis\", shading=\"auto\")\n", + " plt.colorbar(label=\"# events w 1 match + 2 matched vbf jets /num events passing ak8 selection\")\n", + " plt.xlabel(\"R1\")\n", + " plt.ylabel(\"R2\")\n", " plt.title(title)\n", " plt.show()\n", "\n", "\n", - "\n", - "\n", - "#data = parse_log_string(log_string)\n", - "#plot_heatmap(data,title= 'Heatmap of percent unfiltered events with 1 or 2 matched vbf jets')\n", - "#data = parse_log_string(log_string_filtered)\n", - "#plot_heatmap(data,title= 'Heatmap of percent vbf selection (no dijet formed) filtered events with 1 or 2 matched vbf jets')\n", + "# data = parse_log_string(log_string)\n", + "# plot_heatmap(data,title= 'Heatmap of percent unfiltered events with 1 or 2 matched vbf jets')\n", + "# data = parse_log_string(log_string_filtered)\n", + "# plot_heatmap(data,title= 'Heatmap of percent vbf selection (no dijet formed) filtered events with 1 or 2 matched vbf jets')\n", "data = parse_log_string(log_string_filtered_full)\n", - "plot_heatmap(data,title= 'Heatmap of percent events with 1 or 2 matched vbf jets from vbf selected events')\n", - "#data = parse_log_string(log_string_filtered_full2)\n", - "#plot_heatmap(data,title= 'Heatmap of percent events with 1 or 2 matched vbf jets from vbf selected events')\n", + "plot_heatmap(\n", + " data, title=\"Heatmap of percent events with 1 or 2 matched vbf jets from vbf selected events\"\n", + ")\n", + "# data = parse_log_string(log_string_filtered_full2)\n", + "# plot_heatmap(data,title= 'Heatmap of percent events with 1 or 2 matched vbf jets from vbf selected events')\n", "\n", "# data = parse_mega_log_string(mega_log_string_filtered)\n", "# plot_heatmap(data,title= 'Heatmap of percent events with 1 or 2 matched vbf jets from vbf selected events averaged over several samples')\n", "\n", - "data = parse_log_file('heatmap_selection_success_with_correct_Higgs.txt')\n", - "plot_heatmap(data,title= 'Heatmap of percent events with 1 or 2 matched vbf jets from vbf selected events averaged over several samples with identified Higgs')\n", - "\n", + "data = parse_log_file(\"heatmap_selection_success_with_correct_Higgs.txt\")\n", + "plot_heatmap(\n", + " data,\n", + " title=\"Heatmap of percent events with 1 or 2 matched vbf jets from vbf selected events averaged over several samples with identified Higgs\",\n", + ")\n", "\n", - "data = parse_log_file('heatmap_fatjet_exclusion_ak8selections_normalized_selected.txt')\n", - "plot_heatmap(data,title= 'Heatmap of percent unfiltered events with 1 or 2 matched vbf jets (normalized to events w 2 passing ak8 jets)')\n", "\n", - "\n" + "data = parse_log_file(\"heatmap_fatjet_exclusion_ak8selections_normalized_selected.txt\")\n", + "plot_heatmap(\n", + " data,\n", + " title=\"Heatmap of percent unfiltered events with 1 or 2 matched vbf jets (normalized to events w 2 passing ak8 jets)\",\n", + ")" ] }, { @@ -2879,6 +2906,7 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", + "\n", "def plot_data(data):\n", " # Extract the pt, R2, and match information\n", " pt = data[:, 0]\n", @@ -2887,32 +2915,45 @@ "\n", " # Define bins\n", " pt_bins = np.linspace(np.min(pt), np.max(pt), 50)\n", - " R2_bins = np.concatenate((np.array([0.55]), np.arange(0.65, np.max(R2)+0.1, 0.1)))\n", + " R2_bins = np.concatenate((np.array([0.55]), np.arange(0.65, np.max(R2) + 0.1, 0.1)))\n", "\n", " # Create 2D histograms\n", " plt.figure(figsize=(10, 7))\n", - " \n", + "\n", " # Unsuccessful matches\n", - " hist_unsuccessful, _, _, img = plt.hist2d(pt[~match], R2[~match], bins=[pt_bins, R2_bins], cmap='Reds', alpha=0.5, label='Unsuccessful match')\n", - " plt.colorbar(img, label='Count')\n", - " plt.xlabel('VVJet $p_T$')\n", - " plt.ylabel('R2')\n", - " plt.title('Unsuccessful Matches')\n", + " hist_unsuccessful, _, _, img = plt.hist2d(\n", + " pt[~match],\n", + " R2[~match],\n", + " bins=[pt_bins, R2_bins],\n", + " cmap=\"Reds\",\n", + " alpha=0.5,\n", + " label=\"Unsuccessful match\",\n", + " )\n", + " plt.colorbar(img, label=\"Count\")\n", + " plt.xlabel(\"VVJet $p_T$\")\n", + " plt.ylabel(\"R2\")\n", + " plt.title(\"Unsuccessful Matches\")\n", " plt.show()\n", "\n", " # Successful matches\n", " plt.figure(figsize=(10, 7))\n", - " hist_successful, _, _, img = plt.hist2d(pt[match], R2[match], bins=[pt_bins, R2_bins], cmap='Blues', alpha=0.5, label='Successful match')\n", - " plt.colorbar(img, label='Count')\n", - " plt.xlabel('VVJet $p_T$')\n", - " plt.ylabel('R2')\n", - " plt.title('Successful Matches')\n", + " hist_successful, _, _, img = plt.hist2d(\n", + " pt[match],\n", + " R2[match],\n", + " bins=[pt_bins, R2_bins],\n", + " cmap=\"Blues\",\n", + " alpha=0.5,\n", + " label=\"Successful match\",\n", + " )\n", + " plt.colorbar(img, label=\"Count\")\n", + " plt.xlabel(\"VVJet $p_T$\")\n", + " plt.ylabel(\"R2\")\n", + " plt.title(\"Successful Matches\")\n", " plt.show()\n", "\n", "\n", - "\n", "# Use the function\n", - "plot_data(data)\n" + "plot_data(data)" ] }, { @@ -2951,23 +2992,32 @@ "\n", " # Define bins\n", " pt_bins = np.linspace(np.min(pt), np.max(pt), 50)\n", - " R2_bins = np.concatenate((np.array([0.55]), np.arange(0.65, np.max(R2)+0.1, 0.1)))\n", + " R2_bins = np.concatenate((np.array([0.55]), np.arange(0.65, np.max(R2) + 0.1, 0.1)))\n", "\n", " # Create 2D histograms\n", " plt.figure(figsize=(10, 7))\n", - " \n", + "\n", " # Unsuccessful matches\n", " hist_unsuccessful, _, _ = np.histogram2d(pt[~match], R2[~match], bins=[pt_bins, R2_bins])\n", "\n", " # Normalize the histogram\n", - " hist_unsuccessful = hist_unsuccessful / np.maximum(1, hist_unsuccessful.sum(axis=1, keepdims=True))\n", + " hist_unsuccessful = hist_unsuccessful / np.maximum(\n", + " 1, hist_unsuccessful.sum(axis=1, keepdims=True)\n", + " )\n", "\n", " # Plot the normalized data\n", - " plt.imshow(hist_unsuccessful.T, origin='lower', aspect='auto', extent=[pt_bins[0], pt_bins[-1], R2_bins[0], R2_bins[-1]], cmap='Reds', alpha=0.5)\n", - " plt.colorbar(label='Normalized Count')\n", - " plt.xlabel('VVJet $p_T$')\n", - " plt.ylabel('R2')\n", - " plt.title('Unsuccessful Matches')\n", + " plt.imshow(\n", + " hist_unsuccessful.T,\n", + " origin=\"lower\",\n", + " aspect=\"auto\",\n", + " extent=[pt_bins[0], pt_bins[-1], R2_bins[0], R2_bins[-1]],\n", + " cmap=\"Reds\",\n", + " alpha=0.5,\n", + " )\n", + " plt.colorbar(label=\"Normalized Count\")\n", + " plt.xlabel(\"VVJet $p_T$\")\n", + " plt.ylabel(\"R2\")\n", + " plt.title(\"Unsuccessful Matches\")\n", " plt.show()\n", "\n", " # Successful matches\n", @@ -2978,15 +3028,23 @@ " hist_successful = hist_successful / np.maximum(1, hist_successful.sum(axis=1, keepdims=True))\n", "\n", " # Plot the normalized data\n", - " plt.imshow(hist_successful.T, origin='lower', aspect='auto', extent=[pt_bins[0], pt_bins[-1], R2_bins[0], R2_bins[-1]], cmap='Blues', alpha=0.5)\n", - " plt.colorbar(label='Normalized Count')\n", - " plt.xlabel('VVJet $p_T$')\n", - " plt.ylabel('R2')\n", - " plt.title('Successful Matches')\n", + " plt.imshow(\n", + " hist_successful.T,\n", + " origin=\"lower\",\n", + " aspect=\"auto\",\n", + " extent=[pt_bins[0], pt_bins[-1], R2_bins[0], R2_bins[-1]],\n", + " cmap=\"Blues\",\n", + " alpha=0.5,\n", + " )\n", + " plt.colorbar(label=\"Normalized Count\")\n", + " plt.xlabel(\"VVJet $p_T$\")\n", + " plt.ylabel(\"R2\")\n", + " plt.title(\"Successful Matches\")\n", " plt.show()\n", "\n", + "\n", "# Use the function\n", - "plot_data(data)\n" + "plot_data(data)" ] }, { @@ -2998,18 +3056,20 @@ "source": [ "import numpy as np\n", "\n", + "\n", "def read_data(filename):\n", " data = []\n", - " with open(filename, 'r') as file:\n", + " with open(filename, \"r\") as file:\n", " for line in file:\n", " # Remove brackets and newline, and split by comma\n", - " items = line.replace('[', '').replace(']', '').replace('\\n', '').split(',')\n", + " items = line.replace(\"[\", \"\").replace(\"]\", \"\").replace(\"\\n\", \"\").split(\",\")\n", " # Convert strings to floats and append to data\n", " data.append([float(item) for item in items])\n", " return np.array(data)\n", "\n", + "\n", "# Use the function\n", - "data = read_data('output.txt')\n" + "data = read_data(\"output.txt\")" ] }, { @@ -3078,19 +3138,19 @@ "\n", " # Create a dictionary to store the data and labels\n", " data_dict = {\n", - " 'drj1bb': {'values': [val[0] for val in dr_values], 'color': 'blue', 'label': 'drj1bb'},\n", - " 'drj2bb': {'values': [val[1] for val in dr_values], 'color': 'red', 'label': 'drj2bb'},\n", - " 'drj1VV': {'values': [val[2] for val in dr_values], 'color': 'green', 'label': 'drj1VV'},\n", - " 'drj2VV': {'values': [val[3] for val in dr_values], 'color': 'purple', 'label': 'drj2VV'}\n", + " \"drj1bb\": {\"values\": [val[0] for val in dr_values], \"color\": \"blue\", \"label\": \"drj1bb\"},\n", + " \"drj2bb\": {\"values\": [val[1] for val in dr_values], \"color\": \"red\", \"label\": \"drj2bb\"},\n", + " \"drj1VV\": {\"values\": [val[2] for val in dr_values], \"color\": \"green\", \"label\": \"drj1VV\"},\n", + " \"drj2VV\": {\"values\": [val[3] for val in dr_values], \"color\": \"purple\", \"label\": \"drj2VV\"},\n", " }\n", "\n", " # Plot individual histograms\n", " plt.figure(figsize=(12, 8))\n", "\n", " for i, key in enumerate(data_dict.keys()):\n", - " plt.subplot(2, 2, i+1)\n", - " plt.hist(data_dict[key]['values'], bins=50, color=data_dict[key]['color'], alpha=0.7)\n", - " plt.title(data_dict[key]['label'] + \" values\")\n", + " plt.subplot(2, 2, i + 1)\n", + " plt.hist(data_dict[key][\"values\"], bins=50, color=data_dict[key][\"color\"], alpha=0.7)\n", + " plt.title(data_dict[key][\"label\"] + \" values\")\n", " plt.xlabel(\"Delta R\")\n", " if i % 2 == 0:\n", " plt.ylabel(\"Number of Events\")\n", @@ -3100,12 +3160,16 @@ "\n", " # Combined histograms\n", " plt.figure(figsize=(12, 8))\n", - " \n", + "\n", " # bb combined\n", " plt.subplot(2, 2, 1)\n", - " plt.hist([data_dict['drj1bb']['values'], data_dict['drj2bb']['values']], \n", - " bins=50, color=[data_dict['drj1bb']['color'], data_dict['drj2bb']['color']], \n", - " label=[data_dict['drj1bb']['label'], data_dict['drj2bb']['label']], alpha=0.7)\n", + " plt.hist(\n", + " [data_dict[\"drj1bb\"][\"values\"], data_dict[\"drj2bb\"][\"values\"]],\n", + " bins=50,\n", + " color=[data_dict[\"drj1bb\"][\"color\"], data_dict[\"drj2bb\"][\"color\"]],\n", + " label=[data_dict[\"drj1bb\"][\"label\"], data_dict[\"drj2bb\"][\"label\"]],\n", + " alpha=0.7,\n", + " )\n", " plt.title(\"Combined drj1bb and drj2bb values\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Number of Events\")\n", @@ -3113,18 +3177,26 @@ "\n", " # VV combined\n", " plt.subplot(2, 2, 2)\n", - " plt.hist([data_dict['drj1VV']['values'], data_dict['drj2VV']['values']], \n", - " bins=50, color=[data_dict['drj1VV']['color'], data_dict['drj2VV']['color']], \n", - " label=[data_dict['drj1VV']['label'], data_dict['drj2VV']['label']], alpha=0.7)\n", + " plt.hist(\n", + " [data_dict[\"drj1VV\"][\"values\"], data_dict[\"drj2VV\"][\"values\"]],\n", + " bins=50,\n", + " color=[data_dict[\"drj1VV\"][\"color\"], data_dict[\"drj2VV\"][\"color\"]],\n", + " label=[data_dict[\"drj1VV\"][\"label\"], data_dict[\"drj2VV\"][\"label\"]],\n", + " alpha=0.7,\n", + " )\n", " plt.title(\"Combined drj1VV and drj2VV values\")\n", " plt.xlabel(\"Delta R\")\n", " plt.legend()\n", "\n", " # All variables combined\n", " plt.subplot(2, 1, 2)\n", - " plt.hist([data['values'] for data in data_dict.values()], bins=50, \n", - " color=[data['color'] for data in data_dict.values()], \n", - " label=[data['label'] for data in data_dict.values()], alpha=0.7)\n", + " plt.hist(\n", + " [data[\"values\"] for data in data_dict.values()],\n", + " bins=50,\n", + " color=[data[\"color\"] for data in data_dict.values()],\n", + " label=[data[\"label\"] for data in data_dict.values()],\n", + " alpha=0.7,\n", + " )\n", " plt.title(\"All variables combined\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Number of Events\")\n", @@ -3133,9 +3205,10 @@ " plt.tight_layout()\n", " plt.show()\n", "\n", + "\n", "# The function call remains the same:\n", - "process_and_plot_refactored('output_deltaRtruthinfo.txt')\n", - "process_and_plot_refactored('output_deltaRtruthinfo_bbtagging.txt')" + "process_and_plot_refactored(\"output_deltaRtruthinfo.txt\")\n", + "process_and_plot_refactored(\"output_deltaRtruthinfo_bbtagging.txt\")" ] }, { @@ -3166,6 +3239,7 @@ "source": [ "import matplotlib.pyplot as plt\n", "\n", + "\n", "def plot_2d_histograms_from_file(filename):\n", " # Load the data from the file\n", " with open(filename, \"r\") as f:\n", @@ -3184,49 +3258,50 @@ "\n", " # Plotting\n", " plt.figure(figsize=(15, 10))\n", - " \n", + "\n", " # Hbb with vbf gen quark 1\n", " plt.subplot(2, 2, 1)\n", - " plt.hist2d(delta_R_11, pt_Hbb, bins=50, cmap='inferno')\n", - " plt.colorbar(label='Counts')\n", + " plt.hist2d(delta_R_11, pt_Hbb, bins=50, cmap=\"inferno\")\n", + " plt.colorbar(label=\"Counts\")\n", " plt.title(\"Hbb vs vbf gen quark 1\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Hbb Pt\")\n", - " \n", + "\n", " # Hbb with vbf gen quark 2\n", " plt.subplot(2, 2, 2)\n", - " plt.hist2d(delta_R_12, pt_Hbb, bins=50, cmap='inferno')\n", - " plt.colorbar(label='Counts')\n", + " plt.hist2d(delta_R_12, pt_Hbb, bins=50, cmap=\"inferno\")\n", + " plt.colorbar(label=\"Counts\")\n", " plt.title(\"Hbb vs vbf gen quark 2\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Hbb Pt\")\n", - " \n", + "\n", " # HVV with vbf gen quark 1\n", " plt.subplot(2, 2, 3)\n", - " plt.hist2d(delta_R_21, pt_HVV, bins=50, cmap='inferno')\n", - " plt.colorbar(label='Counts')\n", + " plt.hist2d(delta_R_21, pt_HVV, bins=50, cmap=\"inferno\")\n", + " plt.colorbar(label=\"Counts\")\n", " plt.title(\"HVV vs vbf gen quark 1\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"HVV Pt\")\n", - " \n", + "\n", " # HVV with vbf gen quark 2\n", " plt.subplot(2, 2, 4)\n", - " plt.hist2d(delta_R_22, pt_HVV, bins=50, cmap='inferno')\n", - " plt.colorbar(label='Counts')\n", + " plt.hist2d(delta_R_22, pt_HVV, bins=50, cmap=\"inferno\")\n", + " plt.colorbar(label=\"Counts\")\n", " plt.title(\"HVV vs vbf gen quark 2\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"HVV Pt\")\n", - " \n", + "\n", " plt.tight_layout()\n", " plt.show()\n", "\n", + "\n", "# To visualize the histograms, you can call:\n", "# plot_2d_histograms_from_file_updated('output_file.txt')\n", "\n", "# To visualize the histograms, you can call:\n", "# plot_2d_histograms_from_file('output_file.txt')\n", "\n", - "plot_2d_histograms_from_file(\"outputR2vsPtgenHH_posptreq.txt\")\n" + "plot_2d_histograms_from_file(\"outputR2vsPtgenHH_posptreq.txt\")" ] }, { @@ -3271,40 +3346,41 @@ "\n", " # Plotting\n", " plt.figure(figsize=(15, 10))\n", - " \n", + "\n", " # Hbb with vbf gen quark 1\n", " plt.subplot(2, 2, 1)\n", - " plt.hist(delta_R_11, bins=50, color='red', alpha=0.7)\n", + " plt.hist(delta_R_11, bins=50, color=\"red\", alpha=0.7)\n", " plt.title(\"Hbb vs vbf gen quark 1\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Counts\")\n", - " \n", + "\n", " # Hbb with vbf gen quark 2\n", " plt.subplot(2, 2, 2)\n", - " plt.hist(delta_R_12, bins=50, color='blue', alpha=0.7)\n", + " plt.hist(delta_R_12, bins=50, color=\"blue\", alpha=0.7)\n", " plt.title(\"Hbb vs vbf gen quark 2\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Counts\")\n", - " \n", + "\n", " # HVV with vbf gen quark 1\n", " plt.subplot(2, 2, 3)\n", - " plt.hist(delta_R_21, bins=50, color='green', alpha=0.7)\n", + " plt.hist(delta_R_21, bins=50, color=\"green\", alpha=0.7)\n", " plt.title(\"HVV vs vbf gen quark 1\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Counts\")\n", - " \n", + "\n", " # HVV with vbf gen quark 2\n", " plt.subplot(2, 2, 4)\n", - " plt.hist(delta_R_22, bins=50, color='purple', alpha=0.7)\n", + " plt.hist(delta_R_22, bins=50, color=\"purple\", alpha=0.7)\n", " plt.title(\"HVV vs vbf gen quark 2\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Counts\")\n", - " \n", + "\n", " plt.tight_layout()\n", " plt.show()\n", "\n", + "\n", "# Call the function to visualize the histograms\n", - "plot_1d_histograms_from_file(\"outputR2vsPtgenHH_posptreq.txt\")\n" + "plot_1d_histograms_from_file(\"outputR2vsPtgenHH_posptreq.txt\")" ] }, { @@ -3351,28 +3427,27 @@ "\n", " # Plotting\n", " plt.figure(figsize=(15, 10))\n", - " \n", + "\n", " # Tagging score with vbf gen quark 1\n", " plt.subplot(2, 2, 1)\n", - " plt.hist2d(delta_R_11, tagging_score, bins=30, cmap='inferno')\n", - " plt.colorbar(label='Counts')\n", + " plt.hist2d(delta_R_11, tagging_score, bins=30, cmap=\"inferno\")\n", + " plt.colorbar(label=\"Counts\")\n", " plt.title(\"Tagging score vs vbf gen quark 1\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Tagging Score\")\n", - " \n", + "\n", " # Tagging score with vbf gen quark 2\n", " plt.subplot(2, 2, 2)\n", - " plt.hist2d(delta_R_12, tagging_score, bins=30, cmap='inferno')\n", - " plt.colorbar(label='Counts')\n", + " plt.hist2d(delta_R_12, tagging_score, bins=30, cmap=\"inferno\")\n", + " plt.colorbar(label=\"Counts\")\n", " plt.title(\"Tagging score vs vbf gen quark 2\")\n", " plt.xlabel(\"Delta R\")\n", " plt.ylabel(\"Tagging Score\")\n", - " \n", - " \n", - " \n", + "\n", " plt.tight_layout()\n", " plt.show()\n", "\n", + "\n", "# To visualize the histograms, you can call:\n", "# plot_2d_histograms_with_score('output_file.txt')\n", "plot_2d_histograms_with_score(\"outputR2vsPtgenHH_posptreq.txt\")" @@ -3419,16 +3494,16 @@ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "\n", - "df = pd.read_parquet('0-30.parquet')\n", + "df = pd.read_parquet(\"0-30.parquet\")\n", "\n", "# Create a mask for Hbb\n", - "Hbb_mask = df[('ak8FatJetParticleNetMD_Txbb', 0)] > df[('ak8FatJetParticleNetMD_Txbb', 1)]\n", + "Hbb_mask = df[(\"ak8FatJetParticleNetMD_Txbb\", 0)] > df[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", "\n", "# Extract Hbb and HVV values based on the mask\n", - "Hbb_Txbb_values_0 = df[Hbb_mask][('ak8FatJetParticleNetMD_Txbb', 0)]\n", - "Hbb_Txbb_values_1 = df[~Hbb_mask][('ak8FatJetParticleNetMD_Txbb', 1)]\n", - "HVV_Txbb_values_0 = df[~Hbb_mask][('ak8FatJetParticleNetMD_Txbb', 0)]\n", - "HVV_Txbb_values_1 = df[Hbb_mask][('ak8FatJetParticleNetMD_Txbb', 1)]\n", + "Hbb_Txbb_values_0 = df[Hbb_mask][(\"ak8FatJetParticleNetMD_Txbb\", 0)]\n", + "Hbb_Txbb_values_1 = df[~Hbb_mask][(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + "HVV_Txbb_values_0 = df[~Hbb_mask][(\"ak8FatJetParticleNetMD_Txbb\", 0)]\n", + "HVV_Txbb_values_1 = df[Hbb_mask][(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", "\n", "# Concatenate to get complete values for Hbb and HVV\n", "Hbb_Txbb_values = pd.concat([Hbb_Txbb_values_0, Hbb_Txbb_values_1])\n", @@ -3440,42 +3515,51 @@ " dphi = np.pi - np.abs(np.abs(phi1 - phi2) - np.pi)\n", " return np.sqrt(deta**2 + dphi**2)\n", "\n", + "\n", "# Compute all possible delta R combinations\n", - "delta_R_jet0_gen1 = delta_r(df[('ak8FatJetEta', 0)], df[('ak8FatJetPhi', 0)], df[('vbfetaGen', 0)], df[('vbfphiGen', 0)])\n", - "delta_R_jet0_gen2 = delta_r(df[('ak8FatJetEta', 0)], df[('ak8FatJetPhi', 0)], df[('vbfetaGen', 1)], df[('vbfphiGen', 1)])\n", - "delta_R_jet1_gen1 = delta_r(df[('ak8FatJetEta', 1)], df[('ak8FatJetPhi', 1)], df[('vbfetaGen', 0)], df[('vbfphiGen', 0)])\n", - "delta_R_jet1_gen2 = delta_r(df[('ak8FatJetEta', 1)], df[('ak8FatJetPhi', 1)], df[('vbfetaGen', 1)], df[('vbfphiGen', 1)])\n", + "delta_R_jet0_gen1 = delta_r(\n", + " df[(\"ak8FatJetEta\", 0)], df[(\"ak8FatJetPhi\", 0)], df[(\"vbfetaGen\", 0)], df[(\"vbfphiGen\", 0)]\n", + ")\n", + "delta_R_jet0_gen2 = delta_r(\n", + " df[(\"ak8FatJetEta\", 0)], df[(\"ak8FatJetPhi\", 0)], df[(\"vbfetaGen\", 1)], df[(\"vbfphiGen\", 1)]\n", + ")\n", + "delta_R_jet1_gen1 = delta_r(\n", + " df[(\"ak8FatJetEta\", 1)], df[(\"ak8FatJetPhi\", 1)], df[(\"vbfetaGen\", 0)], df[(\"vbfphiGen\", 0)]\n", + ")\n", + "delta_R_jet1_gen2 = delta_r(\n", + " df[(\"ak8FatJetEta\", 1)], df[(\"ak8FatJetPhi\", 1)], df[(\"vbfetaGen\", 1)], df[(\"vbfphiGen\", 1)]\n", + ")\n", "\n", "# Assign delta R values to Hbb and HVV based on the Hbb mask\n", - "df['delta_R_Hbb_gen1'] = np.where(Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", - "df['delta_R_Hbb_gen2'] = np.where(Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", - "df['delta_R_HVV_gen1'] = np.where(~Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", - "df['delta_R_HVV_gen2'] = np.where(~Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", + "df[\"delta_R_Hbb_gen1\"] = np.where(Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", + "df[\"delta_R_Hbb_gen2\"] = np.where(Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", + "df[\"delta_R_HVV_gen1\"] = np.where(~Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", + "df[\"delta_R_HVV_gen2\"] = np.where(~Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", "\n", "\n", "# Plot histograms\n", "plt.figure(figsize=(15, 10))\n", "\n", "plt.subplot(2, 2, 1)\n", - "plt.hist(df['delta_R_Hbb_gen1'], bins=50, color='red', alpha=0.7)\n", + "plt.hist(df[\"delta_R_Hbb_gen1\"], bins=50, color=\"red\", alpha=0.7)\n", "plt.title(\"Hbb vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", "\n", "plt.subplot(2, 2, 2)\n", - "plt.hist(df['delta_R_Hbb_gen2'], bins=50, color='blue', alpha=0.7)\n", + "plt.hist(df[\"delta_R_Hbb_gen2\"], bins=50, color=\"blue\", alpha=0.7)\n", "plt.title(\"Hbb vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", "\n", "plt.subplot(2, 2, 3)\n", - "plt.hist(df['delta_R_HVV_gen1'], bins=50, color='green', alpha=0.7)\n", + "plt.hist(df[\"delta_R_HVV_gen1\"], bins=50, color=\"green\", alpha=0.7)\n", "plt.title(\"HVV vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", "\n", "plt.subplot(2, 2, 4)\n", - "plt.hist(df['delta_R_HVV_gen2'], bins=50, color='purple', alpha=0.7)\n", + "plt.hist(df[\"delta_R_HVV_gen2\"], bins=50, color=\"purple\", alpha=0.7)\n", "plt.title(\"HVV vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", @@ -3488,7 +3572,7 @@ "print(\"Number of HVV jets:\", len(HVV_Txbb_values))\n", "print(\"Total number of events:\", len(df))\n", "print(\"Average for Hbb:\", Hbb_Txbb_values.mean())\n", - "print(\"Average for HVV:\", HVV_Txbb_values.mean())\n" + "print(\"Average for HVV:\", HVV_Txbb_values.mean())" ] }, { @@ -3537,12 +3621,11 @@ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", - "df = pd.read_parquet('0-30.parquet')\n", + "df = pd.read_parquet(\"0-30.parquet\")\n", "\n", "\n", "# Create a mask for Hbb\n", - "Hbb_mask = df[('ak8FatJetParticleNetMD_Txbb', 0)] > df[('ak8FatJetParticleNetMD_Txbb', 1)]\n", - "\n", + "Hbb_mask = df[(\"ak8FatJetParticleNetMD_Txbb\", 0)] > df[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", "\n", "\n", "# Compute delta R for each combination\n", @@ -3551,64 +3634,76 @@ " dphi = np.pi - np.abs(np.abs(phi1 - phi2) - np.pi)\n", " return np.sqrt(deta**2 + dphi**2)\n", "\n", + "\n", "# Compute all possible delta R combinations\n", - "delta_R_jet0_gen1 = delta_r(df[('ak8FatJetEta', 0)], df[('ak8FatJetPhi', 0)], df[('vbfetaGen', 0)], df[('vbfphiGen', 0)])\n", - "delta_R_jet0_gen2 = delta_r(df[('ak8FatJetEta', 0)], df[('ak8FatJetPhi', 0)], df[('vbfetaGen', 1)], df[('vbfphiGen', 1)])\n", - "delta_R_jet1_gen1 = delta_r(df[('ak8FatJetEta', 1)], df[('ak8FatJetPhi', 1)], df[('vbfetaGen', 0)], df[('vbfphiGen', 0)])\n", - "delta_R_jet1_gen2 = delta_r(df[('ak8FatJetEta', 1)], df[('ak8FatJetPhi', 1)], df[('vbfetaGen', 1)], df[('vbfphiGen', 1)])\n", + "delta_R_jet0_gen1 = delta_r(\n", + " df[(\"ak8FatJetEta\", 0)], df[(\"ak8FatJetPhi\", 0)], df[(\"vbfetaGen\", 0)], df[(\"vbfphiGen\", 0)]\n", + ")\n", + "delta_R_jet0_gen2 = delta_r(\n", + " df[(\"ak8FatJetEta\", 0)], df[(\"ak8FatJetPhi\", 0)], df[(\"vbfetaGen\", 1)], df[(\"vbfphiGen\", 1)]\n", + ")\n", + "delta_R_jet1_gen1 = delta_r(\n", + " df[(\"ak8FatJetEta\", 1)], df[(\"ak8FatJetPhi\", 1)], df[(\"vbfetaGen\", 0)], df[(\"vbfphiGen\", 0)]\n", + ")\n", + "delta_R_jet1_gen2 = delta_r(\n", + " df[(\"ak8FatJetEta\", 1)], df[(\"ak8FatJetPhi\", 1)], df[(\"vbfetaGen\", 1)], df[(\"vbfphiGen\", 1)]\n", + ")\n", "\n", "# Assign delta R values to Hbb and HVV based on the Hbb mask\n", - "df['delta_R_Hbb_gen1'] = np.where(Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", - "df['delta_R_Hbb_gen2'] = np.where(Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", - "df['delta_R_HVV_gen1'] = np.where(~Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", - "df['delta_R_HVV_gen2'] = np.where(~Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", + "df[\"delta_R_Hbb_gen1\"] = np.where(Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", + "df[\"delta_R_Hbb_gen2\"] = np.where(Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", + "df[\"delta_R_HVV_gen1\"] = np.where(~Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", + "df[\"delta_R_HVV_gen2\"] = np.where(~Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", "\n", "\n", "# Correctly extract the Txbb values for Hbb\n", - "Hbb_Txbb_values = np.where(Hbb_mask, df[('ak8FatJetParticleNetMD_Txbb', 0)], df[('ak8FatJetParticleNetMD_Txbb', 1)])\n", + "Hbb_Txbb_values = np.where(\n", + " Hbb_mask, df[(\"ak8FatJetParticleNetMD_Txbb\", 0)], df[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + ")\n", "\n", "# Correctly extract the Th4q values for HVV\n", - "#HVV_Th4q_values = np.where(~Hbb_mask, df[('ak8FatJetParTMD_THWWvsT', 0)], df[('ak8FatJetParTMD_THWWvsT', 1)])\n", - "HVV_Th4q_values = np.where(~Hbb_mask, df[('ak8FatJetParticleNet_Th4q', 0)], df[('ak8FatJetParticleNet_Th4q', 1)])\n", + "# HVV_Th4q_values = np.where(~Hbb_mask, df[('ak8FatJetParTMD_THWWvsT', 0)], df[('ak8FatJetParTMD_THWWvsT', 1)])\n", + "HVV_Th4q_values = np.where(\n", + " ~Hbb_mask, df[(\"ak8FatJetParticleNet_Th4q\", 0)], df[(\"ak8FatJetParticleNet_Th4q\", 1)]\n", + ")\n", "\n", "# Apply selection criteria\n", "selection_mask = (Hbb_Txbb_values >= 0.95) & (HVV_Th4q_values >= 0.6)\n", - "#selection_mask = (Hbb_Txbb_values>= 0.8)\n", + "# selection_mask = (Hbb_Txbb_values>= 0.8)\n", "df = df[selection_mask]\n", "Hbb_Txbb_values = Hbb_Txbb_values[selection_mask]\n", "HVV_Th4q_values = HVV_Th4q_values[selection_mask]\n", "\n", "# Extract Delta R values for the events satisfying the mask conditions\n", - "delta_R_Hbb_gen1_values = df['delta_R_Hbb_gen1']\n", - "delta_R_Hbb_gen2_values = df['delta_R_Hbb_gen2']\n", - "delta_R_HVV_gen1_values = df['delta_R_HVV_gen1']\n", - "delta_R_HVV_gen2_values = df['delta_R_HVV_gen2']\n", - "\n", + "delta_R_Hbb_gen1_values = df[\"delta_R_Hbb_gen1\"]\n", + "delta_R_Hbb_gen2_values = df[\"delta_R_Hbb_gen2\"]\n", + "delta_R_HVV_gen1_values = df[\"delta_R_HVV_gen1\"]\n", + "delta_R_HVV_gen2_values = df[\"delta_R_HVV_gen2\"]\n", "\n", "\n", "# Plot histograms 1d histograms\n", "plt.figure(figsize=(15, 10))\n", "\n", "plt.subplot(2, 2, 1)\n", - "plt.hist(df['delta_R_Hbb_gen1'], bins=50, color='red', alpha=0.7)\n", + "plt.hist(df[\"delta_R_Hbb_gen1\"], bins=50, color=\"red\", alpha=0.7)\n", "plt.title(\"Hbb vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", "\n", "plt.subplot(2, 2, 2)\n", - "plt.hist(df['delta_R_Hbb_gen2'], bins=50, color='blue', alpha=0.7)\n", + "plt.hist(df[\"delta_R_Hbb_gen2\"], bins=50, color=\"blue\", alpha=0.7)\n", "plt.title(\"Hbb vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", "\n", "plt.subplot(2, 2, 3)\n", - "plt.hist(df['delta_R_HVV_gen1'], bins=50, color='green', alpha=0.7)\n", + "plt.hist(df[\"delta_R_HVV_gen1\"], bins=50, color=\"green\", alpha=0.7)\n", "plt.title(\"HVV vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", "\n", "plt.subplot(2, 2, 4)\n", - "plt.hist(df['delta_R_HVV_gen2'], bins=50, color='purple', alpha=0.7)\n", + "plt.hist(df[\"delta_R_HVV_gen2\"], bins=50, color=\"purple\", alpha=0.7)\n", "plt.title(\"HVV vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", @@ -3617,39 +3712,37 @@ "plt.show()\n", "\n", "\n", - "\n", - "\n", "# Plotting heatmaps\n", "plt.figure(figsize=(15, 10))\n", "\n", "# Hbb with vbf gen quark 1\n", "plt.subplot(2, 2, 1)\n", - "plt.hist2d(delta_R_Hbb_gen1_values, Hbb_Txbb_values, bins=50, cmap='inferno')\n", - "plt.colorbar(label='Counts')\n", + "plt.hist2d(delta_R_Hbb_gen1_values, Hbb_Txbb_values, bins=50, cmap=\"inferno\")\n", + "plt.colorbar(label=\"Counts\")\n", "plt.title(\"Hbb vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"ParticleNet Score for Hbb\")\n", "\n", "# Hbb with vbf gen quark 2\n", "plt.subplot(2, 2, 2)\n", - "plt.hist2d(delta_R_Hbb_gen2_values, Hbb_Txbb_values, bins=50, cmap='inferno')\n", - "plt.colorbar(label='Counts')\n", + "plt.hist2d(delta_R_Hbb_gen2_values, Hbb_Txbb_values, bins=50, cmap=\"inferno\")\n", + "plt.colorbar(label=\"Counts\")\n", "plt.title(\"Hbb vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"ParticleNet Score for Hbb\")\n", "\n", "# HVV with vbf gen quark 1\n", "plt.subplot(2, 2, 3)\n", - "plt.hist2d(delta_R_HVV_gen1_values, HVV_Th4q_values, bins=50, cmap='inferno')\n", - "plt.colorbar(label='Counts')\n", + "plt.hist2d(delta_R_HVV_gen1_values, HVV_Th4q_values, bins=50, cmap=\"inferno\")\n", + "plt.colorbar(label=\"Counts\")\n", "plt.title(\"HVV vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"ParticleNet Score for HVV\")\n", "\n", "# HVV with vbf gen quark 2\n", "plt.subplot(2, 2, 4)\n", - "plt.hist2d(delta_R_HVV_gen2_values, HVV_Th4q_values, bins=50, cmap='inferno')\n", - "plt.colorbar(label='Counts')\n", + "plt.hist2d(delta_R_HVV_gen2_values, HVV_Th4q_values, bins=50, cmap=\"inferno\")\n", + "plt.colorbar(label=\"Counts\")\n", "plt.title(\"HVV vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"ParticleNet Score for HVV\")\n", @@ -3657,10 +3750,7 @@ "plt.tight_layout()\n", "plt.show()\n", "\n", - "print(HVV_Th4q_values)\n", - "\n", - "\n", - "\n" + "print(HVV_Th4q_values)" ] }, { @@ -3699,19 +3789,16 @@ } ], "source": [ - "import numpy as np # turn this into R1 R2 graphs\n", + "import numpy as np # turn this into R1 R2 graphs\n", "import matplotlib.pyplot as plt\n", "\n", - "df = pd.read_parquet('0-30.parquet')\n", + "df = pd.read_parquet(\"0-30.parquet\")\n", "\n", "# Create a mask for Hbb\n", - "Hbb_gen_mask = df[('GenHiggsChildren', 0)] == 5\n", + "Hbb_gen_mask = df[(\"GenHiggsChildren\", 0)] == 5\n", "\n", "# Create a mask for Hbb\n", - "Hbb_mask = df[('ak8FatJetParticleNetMD_Txbb', 0)] > df[('ak8FatJetParticleNetMD_Txbb', 1)]\n", - "\n", - "\n", - "\n", + "Hbb_mask = df[(\"ak8FatJetParticleNetMD_Txbb\", 0)] > df[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", "\n", "\n", "# Compute delta R for each combination\n", @@ -3720,31 +3807,44 @@ " dphi = np.pi - np.abs(np.abs(phi1 - phi2) - np.pi)\n", " return np.sqrt(deta**2 + dphi**2)\n", "\n", + "\n", "# Compute all possible delta R combinations\n", - "delta_R_jet0_gen1 = delta_r(df[('GenHiggsEta', 0)], df[('GenHiggsPhi', 0)], df[('vbfetaGen', 0)], df[('vbfphiGen', 0)])\n", - "delta_R_jet0_gen2 = delta_r(df[('GenHiggsEta', 0)], df[('GenHiggsPhi', 0)], df[('vbfetaGen', 1)], df[('vbfphiGen', 1)])\n", - "delta_R_jet1_gen1 = delta_r(df[('GenHiggsEta', 1)], df[('GenHiggsPhi', 1)], df[('vbfetaGen', 0)], df[('vbfphiGen', 0)])\n", - "delta_R_jet1_gen2 = delta_r(df[('GenHiggsEta', 1)], df[('GenHiggsPhi', 1)], df[('vbfetaGen', 1)], df[('vbfphiGen', 1)])\n", + "delta_R_jet0_gen1 = delta_r(\n", + " df[(\"GenHiggsEta\", 0)], df[(\"GenHiggsPhi\", 0)], df[(\"vbfetaGen\", 0)], df[(\"vbfphiGen\", 0)]\n", + ")\n", + "delta_R_jet0_gen2 = delta_r(\n", + " df[(\"GenHiggsEta\", 0)], df[(\"GenHiggsPhi\", 0)], df[(\"vbfetaGen\", 1)], df[(\"vbfphiGen\", 1)]\n", + ")\n", + "delta_R_jet1_gen1 = delta_r(\n", + " df[(\"GenHiggsEta\", 1)], df[(\"GenHiggsPhi\", 1)], df[(\"vbfetaGen\", 0)], df[(\"vbfphiGen\", 0)]\n", + ")\n", + "delta_R_jet1_gen2 = delta_r(\n", + " df[(\"GenHiggsEta\", 1)], df[(\"GenHiggsPhi\", 1)], df[(\"vbfetaGen\", 1)], df[(\"vbfphiGen\", 1)]\n", + ")\n", "\n", "# Assign delta R values to Hbb and HVV based on the Hbb mask\n", - "df['delta_R_Hbb_gen1'] = np.where(Hbb_gen_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", - "df['delta_R_Hbb_gen2'] = np.where(Hbb_gen_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", - "df['delta_R_HVV_gen1'] = np.where(~Hbb_gen_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", - "df['delta_R_HVV_gen2'] = np.where(~Hbb_gen_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", + "df[\"delta_R_Hbb_gen1\"] = np.where(Hbb_gen_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", + "df[\"delta_R_Hbb_gen2\"] = np.where(Hbb_gen_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", + "df[\"delta_R_HVV_gen1\"] = np.where(~Hbb_gen_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", + "df[\"delta_R_HVV_gen2\"] = np.where(~Hbb_gen_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", "\n", "# Assign delta R values to Hbb and HVV based on the Hbb mask\n", - "df['delta_R_Hbb_gen1'] = np.where(Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", - "df['delta_R_Hbb_gen2'] = np.where(Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", - "df['delta_R_HVV_gen1'] = np.where(~Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", - "df['delta_R_HVV_gen2'] = np.where(~Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", + "df[\"delta_R_Hbb_gen1\"] = np.where(Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", + "df[\"delta_R_Hbb_gen2\"] = np.where(Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", + "df[\"delta_R_HVV_gen1\"] = np.where(~Hbb_mask, delta_R_jet0_gen1, delta_R_jet1_gen1)\n", + "df[\"delta_R_HVV_gen2\"] = np.where(~Hbb_mask, delta_R_jet0_gen2, delta_R_jet1_gen2)\n", "\n", "\n", "# Correctly extract the Txbb values for Hbb\n", - "Hbb_Txbb_values = np.where(Hbb_mask, df[('ak8FatJetParticleNetMD_Txbb', 0)], df[('ak8FatJetParticleNetMD_Txbb', 1)])\n", + "Hbb_Txbb_values = np.where(\n", + " Hbb_mask, df[(\"ak8FatJetParticleNetMD_Txbb\", 0)], df[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + ")\n", "\n", "# Correctly extract the Th4q values for HVV\n", - "#HVV_Th4q_values = np.where(~Hbb_mask, df[('ak8FatJetParTMD_THWWvsT', 0)], df[('ak8FatJetParTMD_THWWvsT', 1)])\n", - "HVV_Th4q_values = np.where(~Hbb_mask, df[('ak8FatJetParticleNet_Th4q', 0)], df[('ak8FatJetParticleNet_Th4q', 1)])\n", + "# HVV_Th4q_values = np.where(~Hbb_mask, df[('ak8FatJetParTMD_THWWvsT', 0)], df[('ak8FatJetParTMD_THWWvsT', 1)])\n", + "HVV_Th4q_values = np.where(\n", + " ~Hbb_mask, df[(\"ak8FatJetParticleNet_Th4q\", 0)], df[(\"ak8FatJetParticleNet_Th4q\", 1)]\n", + ")\n", "\n", "# Apply selection criteria\n", "if False:\n", @@ -3752,43 +3852,43 @@ " df = df[selection_mask]\n", " Hbb_Txbb_values = Hbb_Txbb_values[selection_mask]\n", " HVV_Th4q_values = HVV_Th4q_values[selection_mask]\n", - " \n", - "selection_mask = (Hbb_Txbb_values >= 0.8)\n", + "\n", + "selection_mask = Hbb_Txbb_values >= 0.8\n", "df = df[selection_mask]\n", "Hbb_Txbb_values = Hbb_Txbb_values[selection_mask]\n", "HVV_Th4q_values = HVV_Th4q_values[selection_mask]\n", "print(HVV_Th4q_values)\n", "\n", "# Extract Delta R values for the events satisfying the mask conditions\n", - "delta_R_Hbb_gen1_values = df['delta_R_Hbb_gen1']\n", - "delta_R_Hbb_gen2_values = df['delta_R_Hbb_gen2']\n", - "delta_R_HVV_gen1_values = df['delta_R_HVV_gen1']\n", - "delta_R_HVV_gen2_values = df['delta_R_HVV_gen2']\n", + "delta_R_Hbb_gen1_values = df[\"delta_R_Hbb_gen1\"]\n", + "delta_R_Hbb_gen2_values = df[\"delta_R_Hbb_gen2\"]\n", + "delta_R_HVV_gen1_values = df[\"delta_R_HVV_gen1\"]\n", + "delta_R_HVV_gen2_values = df[\"delta_R_HVV_gen2\"]\n", "\n", "\n", "# Plot histograms 1d histograms\n", "plt.figure(figsize=(15, 10))\n", "\n", "plt.subplot(2, 2, 1)\n", - "plt.hist(df['delta_R_Hbb_gen1'], bins=50, color='red', alpha=0.7)\n", + "plt.hist(df[\"delta_R_Hbb_gen1\"], bins=50, color=\"red\", alpha=0.7)\n", "plt.title(\"Hbb vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", "\n", "plt.subplot(2, 2, 2)\n", - "plt.hist(df['delta_R_Hbb_gen2'], bins=50, color='blue', alpha=0.7)\n", + "plt.hist(df[\"delta_R_Hbb_gen2\"], bins=50, color=\"blue\", alpha=0.7)\n", "plt.title(\"Hbb vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", "\n", "plt.subplot(2, 2, 3)\n", - "plt.hist(df['delta_R_HVV_gen1'], bins=50, color='green', alpha=0.7)\n", + "plt.hist(df[\"delta_R_HVV_gen1\"], bins=50, color=\"green\", alpha=0.7)\n", "plt.title(\"HVV vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", "\n", "plt.subplot(2, 2, 4)\n", - "plt.hist(df['delta_R_HVV_gen2'], bins=50, color='purple', alpha=0.7)\n", + "plt.hist(df[\"delta_R_HVV_gen2\"], bins=50, color=\"purple\", alpha=0.7)\n", "plt.title(\"HVV vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"Counts\")\n", @@ -3797,45 +3897,43 @@ "plt.show()\n", "\n", "\n", - "\n", - "\n", "# Plotting heatmaps\n", "plt.figure(figsize=(15, 10))\n", "\n", "# Hbb with vbf gen quark 1\n", "plt.subplot(2, 2, 1)\n", - "plt.hist2d(delta_R_Hbb_gen1_values, Hbb_Txbb_values, bins=50, cmap='inferno')\n", - "plt.colorbar(label='Counts')\n", + "plt.hist2d(delta_R_Hbb_gen1_values, Hbb_Txbb_values, bins=50, cmap=\"inferno\")\n", + "plt.colorbar(label=\"Counts\")\n", "plt.title(\"gen Hbb vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"ParticleNet Score for Hbb\")\n", "\n", "# Hbb with vbf gen quark 2\n", "plt.subplot(2, 2, 2)\n", - "plt.hist2d(delta_R_Hbb_gen2_values, Hbb_Txbb_values, bins=50, cmap='inferno')\n", - "plt.colorbar(label='Counts')\n", + "plt.hist2d(delta_R_Hbb_gen2_values, Hbb_Txbb_values, bins=50, cmap=\"inferno\")\n", + "plt.colorbar(label=\"Counts\")\n", "plt.title(\"gen Hbb vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"ParticleNet Score for Hbb\")\n", "\n", "# HVV with vbf gen quark 1\n", "plt.subplot(2, 2, 3)\n", - "plt.hist2d(delta_R_HVV_gen1_values, HVV_Th4q_values, bins=50, cmap='inferno')\n", - "plt.colorbar(label='Counts')\n", + "plt.hist2d(delta_R_HVV_gen1_values, HVV_Th4q_values, bins=50, cmap=\"inferno\")\n", + "plt.colorbar(label=\"Counts\")\n", "plt.title(\"gen HVV vs vbf gen quark 1\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"ParticleNet Score for HVV\")\n", "\n", "# HVV with vbf gen quark 2\n", "plt.subplot(2, 2, 4)\n", - "plt.hist2d(delta_R_HVV_gen2_values, HVV_Th4q_values, bins=50, cmap='inferno')\n", - "plt.colorbar(label='Counts')\n", + "plt.hist2d(delta_R_HVV_gen2_values, HVV_Th4q_values, bins=50, cmap=\"inferno\")\n", + "plt.colorbar(label=\"Counts\")\n", "plt.title(\"gen HVV vs vbf gen quark 2\")\n", "plt.xlabel(\"Delta R\")\n", "plt.ylabel(\"ParticleNet Score for HVV\")\n", "\n", "plt.tight_layout()\n", - "plt.show()\n" + "plt.show()" ] }, { @@ -4294,22 +4392,32 @@ } ], "source": [ - "df = pd.read_parquet('0-14R1R2study.parquet')\n", - "#df = pd.read_parquet('0--1.parquet')\n", - "#df = pd.read_pickle('outfiles/0-2.pkl')\n", - "#print(df)\n", + "df = pd.read_parquet(\"0-14R1R2study.parquet\")\n", + "# df = pd.read_parquet('0--1.parquet')\n", + "# df = pd.read_pickle('outfiles/0-2.pkl')\n", + "# print(df)\n", "print(df.columns.values.tolist())\n", - "df[('vbfR10.3R20.6', 0)]\n", - "\n", - "Hbb_mask = df[('ak8FatJetParticleNetMD_Txbb', 0)] > df[('ak8FatJetParticleNetMD_Txbb', 1)]\n", - "Hbb_Txbb_values = np.where(Hbb_mask, df[('ak8FatJetParticleNetMD_Txbb', 0)], df[('ak8FatJetParticleNetMD_Txbb', 1)])\n", - "HVV_Th4q_values = np.where(~Hbb_mask, df[('ak8FatJetParticleNet_Th4q', 0)], df[('ak8FatJetParticleNet_Th4q', 1)])\n", - "m = (Hbb_Txbb_values >= 0.95) & (HVV_Th4q_values >= 0.6) & (df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodJets', 0)] == 0)\n", - "#m = m & (np.abs(df[('vbfeta', 1)]) > 1.5) & (np.abs(df[('vbfeta', 0)]) > 1.5)\n", + "df[(\"vbfR10.3R20.6\", 0)]\n", + "\n", + "Hbb_mask = df[(\"ak8FatJetParticleNetMD_Txbb\", 0)] > df[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + "Hbb_Txbb_values = np.where(\n", + " Hbb_mask, df[(\"ak8FatJetParticleNetMD_Txbb\", 0)], df[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + ")\n", + "HVV_Th4q_values = np.where(\n", + " ~Hbb_mask, df[(\"ak8FatJetParticleNet_Th4q\", 0)], df[(\"ak8FatJetParticleNet_Th4q\", 1)]\n", + ")\n", + "m = (\n", + " (Hbb_Txbb_values >= 0.95)\n", + " & (HVV_Th4q_values >= 0.6)\n", + " & (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + ")\n", + "# m = m & (np.abs(df[('vbfeta', 1)]) > 1.5) & (np.abs(df[('vbfeta', 0)]) > 1.5)\n", "print(len(df))\n", "df = df[m]\n", "len(df)\n", - "np.sum(df['weight_noxsec'])" + "np.sum(df[\"weight_noxsec\"])" ] }, { @@ -4334,63 +4442,70 @@ "import matplotlib.pyplot as plt\n", "import re\n", "\n", + "\n", "def count_matched_jets(df):\n", " results = []\n", " for column in df.columns:\n", " # Extract columns with R1...R2 format\n", - " #print(column, 'R1' in str(column) and 'R2' in str(column))\n", - " if 'vbfR' in str(column):\n", - " \n", + " # print(column, 'R1' in str(column) and 'R2' in str(column))\n", + " if \"vbfR\" in str(column):\n", + "\n", " l = len(df[column])\n", - " \n", + "\n", " # Use regex to extract R1 and R2 values from column name\n", - " match = re.search(r'R1([\\d\\.]+)R2([\\d\\.]+)', str(column))\n", + " match = re.search(r\"R1([\\d\\.]+)R2([\\d\\.]+)\", str(column))\n", " if match:\n", - " \n", + "\n", " R1 = float(match.group(1))\n", " R2 = float(match.group(2))\n", - " \n", + "\n", " if R2 < 0.6 or R1 < 0.6:\n", " continue\n", - " \n", + "\n", " zero_count = np.sum(df[column] == 0)\n", " one_count = np.sum(df[column] == 1)\n", " two_count = np.sum(df[column] == 2)\n", - " #print(zero_count,one_count,two_count)\n", - " results.append([(one_count + two_count), R1, R2]) # adjust this to see just two count and stuff\n", - " #print([(two_count), R1, R2])\n", + " # print(zero_count,one_count,two_count)\n", + " results.append(\n", + " [(one_count + two_count), R1, R2]\n", + " ) # adjust this to see just two count and stuff\n", + " # print([(two_count), R1, R2])\n", " return np.array(results)\n", "\n", "\n", - "def plot_heatmap(data, title='Num preselected events with 1 or 2 gen quark matched vbf jets'):\n", + "def plot_heatmap(data, title=\"Num preselected events with 1 or 2 gen quark matched vbf jets\"):\n", "\n", - " \n", " # Define grid\n", " R1_vals = sorted(list(set(data[:, 1])))\n", " R2_vals = sorted(list(set(data[:, 2])))\n", - " \n", + "\n", " heatmap_data = np.zeros((len(R1_vals), len(R2_vals)))\n", - " \n", + "\n", " for row in data:\n", " r1_idx = R1_vals.index(row[1])\n", " r2_idx = R2_vals.index(row[2])\n", " heatmap_data[r2_idx, r1_idx] = row[0]\n", - " \n", + "\n", " # Plotting the heatmap\n", " plt.figure(figsize=(8, 6))\n", - " plt.imshow(heatmap_data, aspect='auto', origin='lower', cmap='viridis', extent=[R2_vals[0], R2_vals[-1], R1_vals[0], R1_vals[-1]])\n", - " plt.colorbar(label='# counted from total events passing selection criteria')\n", - " plt.ylabel('R2 (HVV jet seperation requirement)')\n", - " plt.xlabel('R1 (Hbb jet seperation requirement)')\n", + " plt.imshow(\n", + " heatmap_data,\n", + " aspect=\"auto\",\n", + " origin=\"lower\",\n", + " cmap=\"viridis\",\n", + " extent=[R2_vals[0], R2_vals[-1], R1_vals[0], R1_vals[-1]],\n", + " )\n", + " plt.colorbar(label=\"# counted from total events passing selection criteria\")\n", + " plt.ylabel(\"R2 (HVV jet seperation requirement)\")\n", + " plt.xlabel(\"R1 (Hbb jet seperation requirement)\")\n", " plt.title(title)\n", " plt.xticks(R2_vals)\n", " plt.yticks(R1_vals)\n", " plt.show()\n", - " \n", - "results = count_matched_jets(df)\n", - "plot_heatmap(results)\n", "\n", - "\n" + "\n", + "results = count_matched_jets(df)\n", + "plot_heatmap(results)" ] }, { diff --git a/src/HHbbVV/VBF_binder/VBFjetsVisualizePrototype.ipynb b/src/HHbbVV/VBF_binder/VBFjetsVisualizePrototype.ipynb index 91cf0870..f0e9a57f 100644 --- a/src/HHbbVV/VBF_binder/VBFjetsVisualizePrototype.ipynb +++ b/src/HHbbVV/VBF_binder/VBFjetsVisualizePrototype.ipynb @@ -48,36 +48,75 @@ } ], "source": [ - "df = pd.read_parquet('0--1.parquet')\n", - "#df = pd.read_pickle('outfiles/0-2.pkl')\n", - "#print(df)\n", + "df = pd.read_parquet(\"0--1.parquet\")\n", + "# df = pd.read_pickle('outfiles/0-2.pkl')\n", + "# print(df)\n", "print(df.columns.values.tolist())\n", "\n", - "#df[('ak8FatJetParticleNet_Th4q', 0)]\n", - "#df[('ak8FatJetParticleNetMD_Txbb', 0)]\n", - "#df[('ak8FatJetParTMD_THWWvsT', 0)]\n", - "#df[('GenHiggsChildren', 1)]\n", - "\n", - "#print(df_sorted_pt[('vbf_cos1_j1', 0)])\n", - "#print(df_sorted_pt[('vbfeta', 0)])\n", - "print(len(df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJets', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]))\n", - "print(len(df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[(\"vbfNumMatchedGen\",0)] == 2)& (df[('nGoodJets', 0)] == 0)]))\n", - "print(len(df), np.sum(df[(\"vbfNumMatchedGen\",0)] == 2))\n", - "\n", - "\n", - "print(len(df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJets', 0)] >= 2)& (df[('nGoodJets', 0)] == 0) & (np.abs(df[('vbfeta', 0)]) >= 1.5) & (np.abs(df[('vbfeta', 1)]) >= 1.5) ]))\n", - "print('tjtjt',len(df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[(\"vbfNumMatchedGen\",0)] == 2)& (df[('nGoodJets', 0)] == 0) & (np.abs(df[('vbfeta', 0)]) >= 1.5) & (np.abs(df[('vbfeta', 1)]) >= 1.5) ]))\n", - "\n", + "# df[('ak8FatJetParticleNet_Th4q', 0)]\n", + "# df[('ak8FatJetParticleNetMD_Txbb', 0)]\n", + "# df[('ak8FatJetParTMD_THWWvsT', 0)]\n", + "# df[('GenHiggsChildren', 1)]\n", + "\n", + "# print(df_sorted_pt[('vbf_cos1_j1', 0)])\n", + "# print(df_sorted_pt[('vbfeta', 0)])\n", + "print(\n", + " len(\n", + " df[\n", + " (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(\"nGoodVBFJets\", 0)] >= 2)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + " ]\n", + " )\n", + ")\n", + "print(\n", + " len(\n", + " df[\n", + " (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(\"vbfNumMatchedGen\", 0)] == 2)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + " ]\n", + " )\n", + ")\n", + "print(len(df), np.sum(df[(\"vbfNumMatchedGen\", 0)] == 2))\n", + "\n", + "\n", + "print(\n", + " len(\n", + " df[\n", + " (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(\"nGoodVBFJets\", 0)] >= 2)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + " & (np.abs(df[(\"vbfeta\", 0)]) >= 1.5)\n", + " & (np.abs(df[(\"vbfeta\", 1)]) >= 1.5)\n", + " ]\n", + " )\n", + ")\n", + "print(\n", + " \"tjtjt\",\n", + " len(\n", + " df[\n", + " (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(\"vbfNumMatchedGen\", 0)] == 2)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + " & (np.abs(df[(\"vbfeta\", 0)]) >= 1.5)\n", + " & (np.abs(df[(\"vbfeta\", 1)]) >= 1.5)\n", + " ]\n", + " ),\n", + ")\n", "\n", "\n", "# before: 69/294 50/294\n", "# None: 74/294 55/294\n", - "# On events: 64/294 49/294 \n", + "# On events: 64/294 49/294\n", "# overall test pretty inconclusie but also shows that thingy is pretty insignificant. probably best for filtering initial events since it doesn't initially show much difference\n", "\n", "# ('vbfptGen', 0), ('vbfptGen', 1), ('vbfetaGen', 0), ('vbfetaGen', 1), ('vbfphiGen', 0), ('vbfphiGen', 1), ('vbfMGen', 0), ('vbfMGen', 1)\n", - "# ('ak8FatJetEta', 0), ('ak8FatJetEta', 1), ('ak8FatJetPhi', 0), ('ak8FatJetPhi', 1), ('ak8FatJetMass', 0), ('ak8FatJetMass', 1), ('ak8FatJetPt', 0), ('ak8FatJetPt', 1)\n", - " " + "# ('ak8FatJetEta', 0), ('ak8FatJetEta', 1), ('ak8FatJetPhi', 0), ('ak8FatJetPhi', 1), ('ak8FatJetMass', 0), ('ak8FatJetMass', 1), ('ak8FatJetPt', 0), ('ak8FatJetPt', 1)" ] }, { @@ -105,26 +144,27 @@ "source": [ "# Apply lepton veto selections ($N_\\mu =0$ and $N_e=0$)\n", "print(np.shape(df))\n", - "df_em = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0)]\n", + "df_em = df[(df[(\"nGoodMuons\", 0)] == 0) & (df[(\"nGoodElectrons\", 0)] == 0)]\n", "\n", "# 2 vbf jets\n", - "#df_vbf = df[ (df[('nGoodVBFJetsUnsorted', 0)] >= 2)]\n", + "# df_vbf = df[ (df[('nGoodVBFJetsUnsorted', 0)] >= 2)]\n", "\n", "# lepton veto and 2 vbf jets\n", - "#df_unsorted = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsUnsorted', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", - "df_sorted_pt = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJets', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", - "#df_sorted_M = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsSortedM', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", - "#df_sorted_eta = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsSortedeta', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", - "\n", - "\n", - "\n", + "# df_unsorted = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsUnsorted', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", + "df_sorted_pt = df[\n", + " (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(\"nGoodVBFJets\", 0)] >= 2)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + "]\n", + "# df_sorted_M = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsSortedM', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", + "# df_sorted_eta = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodVBFJetsSortedeta', 0)] >= 2)& (df[('nGoodJets', 0)] == 0)]\n", "\n", "\n", "# generate all variables that are needed for the ak8 jet selections in VBF HH4b paper.\n", - "#df[('DijetDeltaPhi', 0)] = np.abs(df[('ak8FatJetPhi', 0)] - df[('ak8FatJetPhi', 1)])\n", - "#df[('DijetDeltaEta', 0)] = np.abs(df[('ak8FatJetEta', 0)] - df[('ak8FatJetEta', 1)])\n", - "#print(np.shape(df)[0],np.shape(df_unsorted)[0],np.shape(df_sorted_pt)[0],np.shape(df_sorted_M)[0],np.shape(df_sorted_eta)[0])\n", - "\n" + "# df[('DijetDeltaPhi', 0)] = np.abs(df[('ak8FatJetPhi', 0)] - df[('ak8FatJetPhi', 1)])\n", + "# df[('DijetDeltaEta', 0)] = np.abs(df[('ak8FatJetEta', 0)] - df[('ak8FatJetEta', 1)])\n", + "# print(np.shape(df)[0],np.shape(df_unsorted)[0],np.shape(df_sorted_pt)[0],np.shape(df_sorted_M)[0],np.shape(df_sorted_eta)[0])" ] }, { @@ -181,40 +221,60 @@ } ], "source": [ - "variables = ['ak8FatJetMass', 'ak8FatJetPt','ak8FatJetEta', 'DijetPt', 'DijetMass','DijetDeltaPhi','DijetDeltaEta'\n", - " ,'nGoodJets','vbfpt','vbfM']\n", - "\n", - "# Determine grid size: \n", + "variables = [\n", + " \"ak8FatJetMass\",\n", + " \"ak8FatJetPt\",\n", + " \"ak8FatJetEta\",\n", + " \"DijetPt\",\n", + " \"DijetMass\",\n", + " \"DijetDeltaPhi\",\n", + " \"DijetDeltaEta\",\n", + " \"nGoodJets\",\n", + " \"vbfpt\",\n", + " \"vbfM\",\n", + "]\n", + "\n", + "# Determine grid size:\n", "grid_size = (np.ceil(len(variables) / 2).astype(int), 2)\n", "\n", "binnums = 20\n", "\n", - "fig, axs = plt.subplots(*grid_size, figsize=(10, 4*grid_size[0]))\n", + "fig, axs = plt.subplots(*grid_size, figsize=(10, 4 * grid_size[0]))\n", "axs = axs.flatten() # Flatten to 1D for easier iteration\n", "\n", "# Loop over variables and their corresponding subplots\n", "for ax, var in zip(axs, variables):\n", " # Check if variable has two entries (0 and 1)\n", " if (var, 1) in df.columns:\n", - " for i, label in zip([0, 1], ['H-bb', 'H-VV']):\n", + " for i, label in zip([0, 1], [\"H-bb\", \"H-VV\"]):\n", " column = df[(var, i)]\n", " if i == 0:\n", - " binwidth = int((max(column) - min(column))/binnums) +0.1\n", - " ax.hist(column, bins=np.arange(min(column), max(column) + binwidth, binwidth), alpha=0.5, label=label)\n", + " binwidth = int((max(column) - min(column)) / binnums) + 0.1\n", + " ax.hist(\n", + " column,\n", + " bins=np.arange(min(column), max(column) + binwidth, binwidth),\n", + " alpha=0.5,\n", + " label=label,\n", + " )\n", " else: # Variable has only one entry (0)\n", " column = df[(var, 0)]\n", - " binwidth = int((max(column) - min(column))/binnums) + 0.1\n", - " ax.hist(column, bins=np.arange(min(column), max(column) + binwidth, binwidth), alpha=0.5, label=var)\n", - "\n", - " ax.set_xlabel('Value')\n", - " ax.set_ylabel('Count')\n", - " ax.set_title(f'Distribution of {var}')\n", + " binwidth = int((max(column) - min(column)) / binnums) + 0.1\n", + " ax.hist(\n", + " column,\n", + " bins=np.arange(min(column), max(column) + binwidth, binwidth),\n", + " alpha=0.5,\n", + " label=var,\n", + " )\n", + "\n", + " ax.set_xlabel(\"Value\")\n", + " ax.set_ylabel(\"Count\")\n", + " ax.set_title(f\"Distribution of {var}\")\n", " ax.legend()\n", " ax.grid(True)\n", "\n", "# Remove unused subplots\n", "if len(variables) < len(axs):\n", - " for ax in axs[len(variables):]:\n", + " for ax in axs[len(variables) :]:\n", " fig.delaxes(ax)\n", "\n", "plt.tight_layout()\n", @@ -240,20 +300,22 @@ ], "source": [ "# example plotting masses of fatjets.\n", - "ak8FatJetMass_bb = df[('vbfM', 0)] # Mass of H-bb\n", - "ak8FatJetMass_vv = df[('vbfM', 1)] # Mass of H-VV\n", + "ak8FatJetMass_bb = df[(\"vbfM\", 0)] # Mass of H-bb\n", + "ak8FatJetMass_vv = df[(\"vbfM\", 1)] # Mass of H-VV\n", "\n", "plt.figure(figsize=(10, 8))\n", "\n", "# plot histogram for H-bb\n", - "plt.hist(ak8FatJetMass_bb, bins=range(min(data), max(data) + binwidth, binwidth), alpha=0.5, label='H-bb')\n", + "plt.hist(\n", + " ak8FatJetMass_bb, bins=range(min(data), max(data) + binwidth, binwidth), alpha=0.5, label=\"H-bb\"\n", + ")\n", "\n", "# plot histogram for H-VV\n", - "plt.hist(ak8FatJetMass_vv, bins=50, alpha=0.5, label='H-VV')\n", + "plt.hist(ak8FatJetMass_vv, bins=50, alpha=0.5, label=\"H-VV\")\n", "\n", - "plt.xlabel('Jet Mass')\n", - "plt.ylabel('Count')\n", - "plt.title('Jet Mass Distribution')\n", + "plt.xlabel(\"Jet Mass\")\n", + "plt.ylabel(\"Count\")\n", + "plt.title(\"Jet Mass Distribution\")\n", "plt.legend()\n", "plt.grid(True)\n", "plt.show()" @@ -277,10 +339,10 @@ "def process_and_plot(df, sort_type, custom_variables=None):\n", " # Variables for each sorting type\n", " variables_dict = {\n", - " 'Sortedpt': ['vbfptSortedpt','vbfetaSortedpt','vbfphiSortedpt','vbfMSortedpt' ],\n", - " 'SortedM': ['vbfptSortedM','vbfetaSortedM','vbfphiSortedM','vbfMSortedM' ],\n", - " 'Sortedeta': ['vbfptSortedeta','vbfetaSortedeta','vbfphiSortedeta','vbfMSortedeta' ],\n", - " 'Unsorted': ['nGoodVBFJetsUnsorted']\n", + " \"Sortedpt\": [\"vbfptSortedpt\", \"vbfetaSortedpt\", \"vbfphiSortedpt\", \"vbfMSortedpt\"],\n", + " \"SortedM\": [\"vbfptSortedM\", \"vbfetaSortedM\", \"vbfphiSortedM\", \"vbfMSortedM\"],\n", + " \"Sortedeta\": [\"vbfptSortedeta\", \"vbfetaSortedeta\", \"vbfphiSortedeta\", \"vbfMSortedeta\"],\n", + " \"Unsorted\": [\"nGoodVBFJetsUnsorted\"],\n", " }\n", "\n", " # Retrieve the corresponding variables\n", @@ -291,59 +353,61 @@ " variables += custom_variables\n", "\n", " # Apply mask to dataframe based on the sort_type\n", - " df = df[(df[('nGoodMuons', 0)] == 0) \n", - " & (df[('nGoodElectrons', 0)] == 0) \n", - " & (df[(f'nGoodVBFJets{sort_type}', 0)] >= 2)\n", - " & (df[('nGoodJets', 0)] == 0)]\n", + " df = df[\n", + " (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(f\"nGoodVBFJets{sort_type}\", 0)] >= 2)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + " ]\n", "\n", - " # Determine grid size: \n", + " # Determine grid size:\n", " grid_size = (np.ceil(len(variables) / 2).astype(int), 2)\n", "\n", - " fig, axs = plt.subplots(*grid_size, figsize=(10, 4*grid_size[0]))\n", + " fig, axs = plt.subplots(*grid_size, figsize=(10, 4 * grid_size[0]))\n", " axs = axs.flatten() # Flatten to 1D for easier iteration\n", "\n", " # Loop over variables and their corresponding subplots\n", " for ax, var in zip(axs, variables):\n", " # Determine bins based on variable type\n", - " if 'pt' in var:\n", + " if \"pt\" in var:\n", " bins = np.linspace(0, 1500, 51)\n", - " elif 'mass' in var:\n", + " elif \"mass\" in var:\n", " bins = np.linspace(0, 3000, 51)\n", - " elif 'phi' in var:\n", + " elif \"phi\" in var:\n", " bins = np.linspace(0, 5, 51)\n", " else:\n", " bins = 50 # default binning if variable type is not recognized\n", - " if 'vbfpt' in var:\n", + " if \"vbfpt\" in var:\n", " bins = np.linspace(0, 500, 51)\n", - " if 'vbfeta' in var:\n", + " if \"vbfeta\" in var:\n", " bins = np.linspace(-5, 5, 51)\n", - " if 'vbfM' in var:\n", + " if \"vbfM\" in var:\n", " bins = np.linspace(0, 100, 51)\n", - " if 'vbfphi' in var:\n", + " if \"vbfphi\" in var:\n", " bins = np.linspace(-1, 5, 51)\n", "\n", " # Check if variable has two entries (0 and 1)\n", " if (var, 1) in df.columns:\n", " for i in [0, 1]:\n", " column = df[(var, i)]\n", - " ax.hist(column, bins=bins, alpha=0.5, label=f'Jet {i+1}')\n", + " ax.hist(column, bins=bins, alpha=0.5, label=f\"Jet {i+1}\")\n", " else: # Variable has only one entry (0)\n", " column = df[(var, 0)]\n", " ax.hist(column, bins=bins, alpha=0.5, label=var)\n", "\n", - " ax.set_xlabel('Value')\n", - " ax.set_ylabel('Count')\n", - " ax.set_title(f'Distribution of {var}')\n", + " ax.set_xlabel(\"Value\")\n", + " ax.set_ylabel(\"Count\")\n", + " ax.set_title(f\"Distribution of {var}\")\n", " ax.legend()\n", " ax.grid(True)\n", "\n", " # Remove unused subplots\n", " if len(variables) < len(axs):\n", - " for ax in axs[len(variables):]:\n", + " for ax in axs[len(variables) :]:\n", " fig.delaxes(ax)\n", "\n", " plt.tight_layout()\n", - " plt.show()\n" + " plt.show()" ] }, { @@ -364,9 +428,9 @@ } ], "source": [ - "variables = ['ak8FatJetMass', 'ak8FatJetPt', 'DijetPt', 'DijetMass']\n", - "#df = pd.read_parquet('0--1.parquet')\n", - "process_and_plot(df, 'Sortedpt', variables)\n", + "variables = [\"ak8FatJetMass\", \"ak8FatJetPt\", \"DijetPt\", \"DijetMass\"]\n", + "# df = pd.read_parquet('0--1.parquet')\n", + "process_and_plot(df, \"Sortedpt\", variables)\n", "#'Unsorted': ['vbfptUnsorted','vbfetaUnsorted','vbfphiUnsorted','vbfMUnsorted','nGoodVBFJetsUnsorted']" ] }, @@ -377,13 +441,37 @@ "metadata": {}, "outputs": [], "source": [ - "def process_and_plot_all(df, num_bin=50,custom_variables=None):\n", + "def process_and_plot_all(df, num_bin=50, custom_variables=None):\n", " # Variables for each sorting type\n", " variables_dict = {\n", - " 'Unsorted': ['vbfptUnsorted','vbfetaUnsorted','vbfphiUnsorted','vbfMUnsorted','nGoodVBFJetsUnsorted'],\n", - " 'Sortedpt': ['vbfptSortedpt','vbfetaSortedpt','vbfphiSortedpt','vbfMSortedpt','nGoodVBFJetsSortedpt'],\n", - " 'SortedM': ['vbfptSortedM','vbfetaSortedM','vbfphiSortedM','vbfMSortedM','nGoodVBFJetsSortedM'],\n", - " 'Sortedeta': ['vbfptSortedeta','vbfetaSortedeta','vbfphiSortedeta','vbfMSortedeta','nGoodVBFJetsSortedeta']\n", + " \"Unsorted\": [\n", + " \"vbfptUnsorted\",\n", + " \"vbfetaUnsorted\",\n", + " \"vbfphiUnsorted\",\n", + " \"vbfMUnsorted\",\n", + " \"nGoodVBFJetsUnsorted\",\n", + " ],\n", + " \"Sortedpt\": [\n", + " \"vbfptSortedpt\",\n", + " \"vbfetaSortedpt\",\n", + " \"vbfphiSortedpt\",\n", + " \"vbfMSortedpt\",\n", + " \"nGoodVBFJetsSortedpt\",\n", + " ],\n", + " \"SortedM\": [\n", + " \"vbfptSortedM\",\n", + " \"vbfetaSortedM\",\n", + " \"vbfphiSortedM\",\n", + " \"vbfMSortedM\",\n", + " \"nGoodVBFJetsSortedM\",\n", + " ],\n", + " \"Sortedeta\": [\n", + " \"vbfptSortedeta\",\n", + " \"vbfetaSortedeta\",\n", + " \"vbfphiSortedeta\",\n", + " \"vbfMSortedeta\",\n", + " \"nGoodVBFJetsSortedeta\",\n", + " ],\n", " }\n", "\n", " # Add custom variables if provided\n", @@ -394,60 +482,69 @@ " # Determine grid size\n", " grid_size = (max(len(vars) for vars in variables_dict.values()), len(variables_dict))\n", "\n", - " fig, axs = plt.subplots(*grid_size, figsize=(10*grid_size[1], 4*grid_size[0]))\n", + " fig, axs = plt.subplots(*grid_size, figsize=(10 * grid_size[1], 4 * grid_size[0]))\n", "\n", " # Loop over sorting types and their corresponding subplot columns\n", " for sort_type, col_axs in zip(variables_dict.keys(), axs.T):\n", " # Apply mask to dataframe based on the sort_type\n", - " df_filtered = df[(df[('nGoodMuons', 0)] == 0) \n", - " & (df[('nGoodElectrons', 0)] == 0) \n", - " & (df[(f'nGoodVBFJets{sort_type}', 0)] >= 2)\n", - " & (df[('nGoodJets', 0)] == 0)]\n", + " df_filtered = df[\n", + " (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(f\"nGoodVBFJets{sort_type}\", 0)] >= 2)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + " ]\n", "\n", " # Loop over variables and their corresponding subplots\n", " for ax, var in zip(col_axs, variables_dict[sort_type]):\n", " # Determine bins based on variable type\n", - " if 'pt' in var:\n", - " bins = np.linspace(0, 1500, num_bin+1)\n", - " elif 'mass' in var:\n", - " bins = np.linspace(0, 3000, num_bin+1)\n", - " elif 'phi' in var:\n", - " bins = np.linspace(0, 5, num_bin+1)\n", + " if \"pt\" in var:\n", + " bins = np.linspace(0, 1500, num_bin + 1)\n", + " elif \"mass\" in var:\n", + " bins = np.linspace(0, 3000, num_bin + 1)\n", + " elif \"phi\" in var:\n", + " bins = np.linspace(0, 5, num_bin + 1)\n", " else:\n", " bins = num_bin # default binning if variable type is not recognized\n", - " \n", - " if 'vbfpt' in var:\n", - " bins = np.linspace(0, 500, num_bin+1)\n", - " if 'vbfeta' in var:\n", - " bins = np.linspace(-5, 5, num_bin+1)\n", - " if 'vbfM' in var:\n", - " bins = np.linspace(0, 100, num_bin+1)\n", - " if 'vbfphi' in var:\n", - " bins = np.linspace(-1, 5, num_bin+1)\n", - " \n", - " weights = df_filtered[('weight', 0)]\n", + "\n", + " if \"vbfpt\" in var:\n", + " bins = np.linspace(0, 500, num_bin + 1)\n", + " if \"vbfeta\" in var:\n", + " bins = np.linspace(-5, 5, num_bin + 1)\n", + " if \"vbfM\" in var:\n", + " bins = np.linspace(0, 100, num_bin + 1)\n", + " if \"vbfphi\" in var:\n", + " bins = np.linspace(-1, 5, num_bin + 1)\n", + "\n", + " weights = df_filtered[(\"weight\", 0)]\n", "\n", " # Check if variable has two entries (0 and 1)\n", " if (var, 1) in df_filtered.columns:\n", " for i in [0, 1]:\n", " column = df_filtered[(var, i)]\n", - " ax.hist(column, bins=bins, alpha=0.5, label=f'Jet {i+1}',density=True,weights=weights)\n", + " ax.hist(\n", + " column,\n", + " bins=bins,\n", + " alpha=0.5,\n", + " label=f\"Jet {i+1}\",\n", + " density=True,\n", + " weights=weights,\n", + " )\n", " else: # Variable has only one entry (0)\n", " column = df_filtered[(var, 0)]\n", - " ax.hist(column, bins=bins, alpha=0.5, label=var,density=True,weights=weights)\n", + " ax.hist(column, bins=bins, alpha=0.5, label=var, density=True, weights=weights)\n", "\n", - " ax.set_xlabel('Value')\n", - " ax.set_ylabel('Count')\n", - " ax.set_title(f'Distribution of {var} ({sort_type})')\n", + " ax.set_xlabel(\"Value\")\n", + " ax.set_ylabel(\"Count\")\n", + " ax.set_title(f\"Distribution of {var} ({sort_type})\")\n", " ax.legend()\n", " ax.grid(True)\n", "\n", " # Remove unused subplots\n", - " for ax in col_axs[len(variables_dict[sort_type]):]:\n", + " for ax in col_axs[len(variables_dict[sort_type]) :]:\n", " fig.delaxes(ax)\n", "\n", " plt.tight_layout()\n", - " plt.show()\n" + " plt.show()" ] }, { @@ -468,9 +565,9 @@ } ], "source": [ - "variables = ['ak8FatJetMass', 'ak8FatJetPt', 'DijetPt', 'DijetMass']\n", - "df = pd.read_parquet('0-10.parquet')\n", - "process_and_plot_all(df, num_bin=50,custom_variables=variables)\n", + "variables = [\"ak8FatJetMass\", \"ak8FatJetPt\", \"DijetPt\", \"DijetMass\"]\n", + "df = pd.read_parquet(\"0-10.parquet\")\n", + "process_and_plot_all(df, num_bin=50, custom_variables=variables)\n", "#'Unsorted': ['vbfptUnsorted','vbfetaUnsorted','vbfphiUnsorted','vbfMUnsorted','nGoodVBFJetsUnsorted']" ] }, @@ -580,21 +677,23 @@ "source": [ "def compare_filter_effects(df):\n", " # Variables for each sorting type\n", - " sort_types = ['Unsorted', 'Sortedpt', 'SortedM', 'Sortedeta']\n", + " sort_types = [\"Unsorted\", \"Sortedpt\", \"SortedM\", \"Sortedeta\"]\n", "\n", " # Different masks to apply\n", " masks = [\n", - " ('No Muons', df[('nGoodMuons', 0)] == 0),\n", - " ('No Electrons', df[('nGoodElectrons', 0)] == 0),\n", - " ('tt BG mask', df[('nGoodJets', 0)] == 0)\n", + " (\"No Muons\", df[(\"nGoodMuons\", 0)] == 0),\n", + " (\"No Electrons\", df[(\"nGoodElectrons\", 0)] == 0),\n", + " (\"tt BG mask\", df[(\"nGoodJets\", 0)] == 0),\n", " ]\n", "\n", " # Create an empty dataframe to store the results\n", - " results = pd.DataFrame(columns=['Sorting Method/Mask', 'Weighted Number of Jets'])\n", - " \n", - " # Include original DataFrame first\n", - " weighted_jet_count = df[('weight', 0)].sum()\n", - " original_row = pd.DataFrame({'Sorting Method/Mask': ['Original'], 'Weighted Number of Jets': [weighted_jet_count]})\n", + " results = pd.DataFrame(columns=[\"Sorting Method/Mask\", \"Weighted Number of Jets\"])\n", + "\n", + " # Include original DataFrame first\n", + " weighted_jet_count = df[(\"weight\", 0)].sum()\n", + " original_row = pd.DataFrame(\n", + " {\"Sorting Method/Mask\": [\"Original\"], \"Weighted Number of Jets\": [weighted_jet_count]}\n", + " )\n", " results = pd.concat([results, original_row], ignore_index=True)\n", "\n", " # Loop over sort types and masks\n", @@ -605,22 +704,28 @@ " df_temp = df_temp[mask] # Apply the mask\n", "\n", " # 'At least 2 VBF Jets' mask is a function, apply it separately\n", - " df_temp = df_temp[df_temp[(f'nGoodVBFJets{sort_type}', 0)] >= 2]\n", - " mask_name = 'At least 2 VBF Jets'\n", + " df_temp = df_temp[df_temp[(f\"nGoodVBFJets{sort_type}\", 0)] >= 2]\n", + " mask_name = \"At least 2 VBF Jets\"\n", "\n", " # Store the number of remaining jets along with the corresponding sorting method and mask\n", " # Instead of counting rows, sum the weights\n", - " weighted_jet_count = df_temp[('weight', 0)].sum()\n", - " new_row = pd.DataFrame({'Sorting Method/Mask': [f'{sort_type} - {mask_name}'], 'Weighted Number of Jets': [weighted_jet_count]})\n", + " weighted_jet_count = df_temp[(\"weight\", 0)].sum()\n", + " new_row = pd.DataFrame(\n", + " {\n", + " \"Sorting Method/Mask\": [f\"{sort_type} - {mask_name}\"],\n", + " \"Weighted Number of Jets\": [weighted_jet_count],\n", + " }\n", + " )\n", " results = pd.concat([results, new_row], ignore_index=True)\n", "\n", " return results\n", "\n", + "\n", "# Now you can use this function and display the result as a pandas DataFrame:\n", "results = compare_filter_effects(df)\n", "display(results)\n", "\n", - "print(np.shape(df)[0])\n" + "print(np.shape(df)[0])" ] }, { @@ -908,19 +1013,20 @@ "source": [ "import itertools\n", "\n", + "\n", "def compare_filter_effects(df):\n", " # Variables for each sorting type\n", - " sort_types = ['Unsorted', 'Sortedpt', 'SortedM', 'Sortedeta']\n", + " sort_types = [\"Unsorted\", \"Sortedpt\", \"SortedM\", \"Sortedeta\"]\n", "\n", " # Different masks to apply\n", " masks = [\n", - " ('No Muons', df[('nGoodMuons', 0)] == 0),\n", - " ('No Electrons', df[('nGoodElectrons', 0)] == 0),\n", - " ('tt BG mask', df[('nGoodJets', 0)] == 0)\n", + " (\"No Muons\", df[(\"nGoodMuons\", 0)] == 0),\n", + " (\"No Electrons\", df[(\"nGoodElectrons\", 0)] == 0),\n", + " (\"tt BG mask\", df[(\"nGoodJets\", 0)] == 0),\n", " ]\n", "\n", " # Create an empty dataframe to store the results\n", - " results = pd.DataFrame(columns=['Sorting Method/Mask', 'Number of Jets'])\n", + " results = pd.DataFrame(columns=[\"Sorting Method/Mask\", \"Number of Jets\"])\n", "\n", " # Loop over sort types\n", " for sort_type in sort_types:\n", @@ -932,25 +1038,32 @@ "\n", " # Generate the mask combination name (sort_type + mask names)\n", " mask_combination_names = [mask_name for mask_name, _ in mask_combination]\n", - " mask_name = f'{sort_type} - ' + ', '.join(mask_combination_names) if mask_combination_names else sort_type\n", - " \n", + " mask_name = (\n", + " f\"{sort_type} - \" + \", \".join(mask_combination_names)\n", + " if mask_combination_names\n", + " else sort_type\n", + " )\n", + "\n", " # Apply each mask in the combination\n", " for _, mask in mask_combination:\n", " df_temp = df_temp[mask]\n", - " \n", + "\n", " # 'At least 2 VBF Jets' mask is a function, apply it separately\n", - " df_temp = df_temp[df_temp[(f'nGoodVBFJets{sort_type}', 0)] >= 2]\n", - " mask_name += ', At least 2 VBF Jets'\n", + " df_temp = df_temp[df_temp[(f\"nGoodVBFJets{sort_type}\", 0)] >= 2]\n", + " mask_name += \", At least 2 VBF Jets\"\n", "\n", " # Store the number of remaining jets along with the corresponding sorting method and mask\n", - " new_row = pd.DataFrame({'Sorting Method/Mask': [mask_name], 'Number of Jets': [np.shape(df_temp)[0]]})\n", + " new_row = pd.DataFrame(\n", + " {\"Sorting Method/Mask\": [mask_name], \"Number of Jets\": [np.shape(df_temp)[0]]}\n", + " )\n", " results = pd.concat([results, new_row], ignore_index=True)\n", "\n", " return results\n", "\n", + "\n", "# Now you can use this function and display the result as a pandas DataFrame:\n", "results = compare_filter_effects(df)\n", - "display(results)\n" + "display(results)" ] }, { @@ -982,7 +1095,7 @@ } ], "source": [ - "df[('weight', 0)]" + "df[(\"weight\", 0)]" ] }, { @@ -1480,27 +1593,37 @@ } ], "source": [ - "\n", "def apply_selection(df):\n", " print(df.columns.values.tolist())\n", "\n", - " Hbb_mask = df[('ak8FatJetParticleNetMD_Txbb', 0)] > df[('ak8FatJetParticleNetMD_Txbb', 1)]\n", - " Hbb_Txbb_values = np.where(Hbb_mask, df[('ak8FatJetParticleNetMD_Txbb', 0)], df[('ak8FatJetParticleNetMD_Txbb', 1)])\n", - " HVV_Th4q_values = np.where(~Hbb_mask, df[('ak8FatJetParticleNet_Th4q', 0)], df[('ak8FatJetParticleNet_Th4q', 1)])\n", - " m = (Hbb_Txbb_values >= -0.95) & (HVV_Th4q_values >= -0.6) \n", - " m = m & (df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0) & (df[('nGoodJets', 0)] == 0) & (df[('nGoodVBFJets', 0)] >= 2)\n", - " \n", + " Hbb_mask = df[(\"ak8FatJetParticleNetMD_Txbb\", 0)] > df[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + " Hbb_Txbb_values = np.where(\n", + " Hbb_mask, df[(\"ak8FatJetParticleNetMD_Txbb\", 0)], df[(\"ak8FatJetParticleNetMD_Txbb\", 1)]\n", + " )\n", + " HVV_Th4q_values = np.where(\n", + " ~Hbb_mask, df[(\"ak8FatJetParticleNet_Th4q\", 0)], df[(\"ak8FatJetParticleNet_Th4q\", 1)]\n", + " )\n", + " m = (Hbb_Txbb_values >= -0.95) & (HVV_Th4q_values >= -0.6)\n", + " m = (\n", + " m\n", + " & (df[(\"nGoodMuons\", 0)] == 0)\n", + " & (df[(\"nGoodElectrons\", 0)] == 0)\n", + " & (df[(\"nGoodJets\", 0)] == 0)\n", + " & (df[(\"nGoodVBFJets\", 0)] >= 2)\n", + " )\n", + "\n", " return df[m]\n", "\n", - "df1 = pd.read_parquet('0-5signal.parquet')\n", + "\n", + "df1 = pd.read_parquet(\"0-5signal.parquet\")\n", "df1 = apply_selection(df1)\n", "\n", - "df2 = pd.read_parquet('0-5.parquet')\n", + "df2 = pd.read_parquet(\"0-5.parquet\")\n", "print(df2)\n", "df2 = apply_selection(df2)\n", "df1.columns.values.tolist()\n", - "#df2[('vbf_dR_j0_Hbb', 0)]\n", - "#df2[('vbf_dEta_jj', 0)]" + "# df2[('vbf_dR_j0_Hbb', 0)]\n", + "# df2[('vbf_dEta_jj', 0)]" ] }, { @@ -1521,29 +1644,97 @@ } ], "source": [ - "variables = [ ('ak8FatJetHbb', 0), ('ak8FatJetHbb', 1), ('ak8FatJetHVV', 0), \n", - " ('ak8FatJetHVV', 1), ('ak8FatJetHVVNumProngs', 0), ('vbfptGen', 0), ('vbfptGen', 1), ('vbfetaGen', 0), ('vbfetaGen', 1), ('vbfphiGen', 0),\n", - " ('vbfphiGen', 1), ('vbfMGen', 0), ('vbfMGen', 1), ('vbfpt', 0), ('vbfpt', 1), ('vbfeta', 0), ('vbfeta', 1), ('vbfphi', 0), ('vbfphi', 1), \n", - " ('vbfM', 0), ('vbfM', 1), ('nGoodVBFJets', 0), ('ak8FatJetEta', 0), ('ak8FatJetEta', 1), ('ak8FatJetPhi', 0), ('ak8FatJetPhi', 1), \n", - " ('ak8FatJetMass', 0), ('ak8FatJetMass', 1), ('ak8FatJetPt', 0), ('ak8FatJetPt', 1), ('ak8FatJetMsd', 0), ('ak8FatJetMsd', 1), \n", - " ]\n", - "'''not sure how to access 'Regressed mass of H → VV jet' maybe also in inf3erence. not sure if ('MET_pt', 0) is MET that we want. Also ('ak8FatJetPt', 0),\n", + "variables = [\n", + " (\"ak8FatJetHbb\", 0),\n", + " (\"ak8FatJetHbb\", 1),\n", + " (\"ak8FatJetHVV\", 0),\n", + " (\"ak8FatJetHVV\", 1),\n", + " (\"ak8FatJetHVVNumProngs\", 0),\n", + " (\"vbfptGen\", 0),\n", + " (\"vbfptGen\", 1),\n", + " (\"vbfetaGen\", 0),\n", + " (\"vbfetaGen\", 1),\n", + " (\"vbfphiGen\", 0),\n", + " (\"vbfphiGen\", 1),\n", + " (\"vbfMGen\", 0),\n", + " (\"vbfMGen\", 1),\n", + " (\"vbfpt\", 0),\n", + " (\"vbfpt\", 1),\n", + " (\"vbfeta\", 0),\n", + " (\"vbfeta\", 1),\n", + " (\"vbfphi\", 0),\n", + " (\"vbfphi\", 1),\n", + " (\"vbfM\", 0),\n", + " (\"vbfM\", 1),\n", + " (\"nGoodVBFJets\", 0),\n", + " (\"ak8FatJetEta\", 0),\n", + " (\"ak8FatJetEta\", 1),\n", + " (\"ak8FatJetPhi\", 0),\n", + " (\"ak8FatJetPhi\", 1),\n", + " (\"ak8FatJetMass\", 0),\n", + " (\"ak8FatJetMass\", 1),\n", + " (\"ak8FatJetPt\", 0),\n", + " (\"ak8FatJetPt\", 1),\n", + " (\"ak8FatJetMsd\", 0),\n", + " (\"ak8FatJetMsd\", 1),\n", + "]\n", + "\"\"\"not sure how to access 'Regressed mass of H → VV jet' maybe also in inf3erence. not sure if ('MET_pt', 0) is MET that we want. Also ('ak8FatJetPt', 0),\n", " ('ak8FatJetPt', 1) is just shuffling the pts instead of actually being for Hbb and HVV. same for eta.\n", " not sure what ('ak8FatJetHbb', 0), is\n", - " '''\n", - "\n", - "\n", - "labels = ['Mass of dijet system','H → VV jet pT over dijet system pT','pT of dijet system ',\n", - " 'pT of H → bb jet','pT of H → VV jet','H → VV jet pT over H → bb jet pT', \n", - " 'Missing transverse energy in event','H → bb jet pT over dijet system pT', 'Eta of H → bb jet', 'Eta of H → VV jet',\n", - " 'Eta of dijet system', 'Leading VBF-jet transverse momentum', 'Subleading VBF-jet transverse momentum', '∆R distance between VBF-jet pair'\n", - " 'VBF-jet pair mass','psuedo-rapidity separation of VBF-jet pair', '∆R distance between the two Higgs bosons','∆R distance between the HVV and Leading VBF-jet',\n", - " '∆R distance between the HVV and Subleading VBF-jet','∆R distance between the Hbb and Leading VBF-jet',\n", - " '∆R distance between the Hbb and Subleading VBF-jet', 'Leading VBF-jet cos(θ) in the HH+2j center of mass frame', 'Subleading VBF-jet cos(θ) in the HH+2j center of mass frame', ' H1-centrality * H2-centrality']\n", - "variables = [('DijetMass', 0), ('VVFatJetPtOverDijetPt', 0),('DijetPt', 0),('ak8FatJetPt', 0),\n", - " ('ak8FatJetPt', 1),('VVFatJetPtOverbbFatJetPt', 0),('MET_pt', 0),('bbFatJetPtOverDijetPt', 0),('ak8FatJetEta', 0),('ak8FatJetEta', 1),('DijetEta', 0),\n", - " ('vbfpt', 0),('vbfpt', 1),('vbf_dR_jj', 0),('vbf_Mass_jj', 0),('vbf_dEta_jj', 0), ('vbf_dR_HH', 0), ('vbf_dR_j0_HVV', 0),('vbf_dR_j1_HVV', 0),\n", - " ('vbf_dR_j0_Hbb', 0),('vbf_dR_j1_Hbb', 0),('vbf_cos1_j1', 0),('vbf_cos1_j2', 0) ,('vbf_prod_centrality', 0)]\n", + " \"\"\"\n", + "\n", + "\n", + "labels = [\n", + " \"Mass of dijet system\",\n", + " \"H → VV jet pT over dijet system pT\",\n", + " \"pT of dijet system \",\n", + " \"pT of H → bb jet\",\n", + " \"pT of H → VV jet\",\n", + " \"H → VV jet pT over H → bb jet pT\",\n", + " \"Missing transverse energy in event\",\n", + " \"H → bb jet pT over dijet system pT\",\n", + " \"Eta of H → bb jet\",\n", + " \"Eta of H → VV jet\",\n", + " \"Eta of dijet system\",\n", + " \"Leading VBF-jet transverse momentum\",\n", + " \"Subleading VBF-jet transverse momentum\",\n", + " \"∆R distance between VBF-jet pair\" \"VBF-jet pair mass\",\n", + " \"psuedo-rapidity separation of VBF-jet pair\",\n", + " \"∆R distance between the two Higgs bosons\",\n", + " \"∆R distance between the HVV and Leading VBF-jet\",\n", + " \"∆R distance between the HVV and Subleading VBF-jet\",\n", + " \"∆R distance between the Hbb and Leading VBF-jet\",\n", + " \"∆R distance between the Hbb and Subleading VBF-jet\",\n", + " \"Leading VBF-jet cos(θ) in the HH+2j center of mass frame\",\n", + " \"Subleading VBF-jet cos(θ) in the HH+2j center of mass frame\",\n", + " \" H1-centrality * H2-centrality\",\n", + "]\n", + "variables = [\n", + " (\"DijetMass\", 0),\n", + " (\"VVFatJetPtOverDijetPt\", 0),\n", + " (\"DijetPt\", 0),\n", + " (\"ak8FatJetPt\", 0),\n", + " (\"ak8FatJetPt\", 1),\n", + " (\"VVFatJetPtOverbbFatJetPt\", 0),\n", + " (\"MET_pt\", 0),\n", + " (\"bbFatJetPtOverDijetPt\", 0),\n", + " (\"ak8FatJetEta\", 0),\n", + " (\"ak8FatJetEta\", 1),\n", + " (\"DijetEta\", 0),\n", + " (\"vbfpt\", 0),\n", + " (\"vbfpt\", 1),\n", + " (\"vbf_dR_jj\", 0),\n", + " (\"vbf_Mass_jj\", 0),\n", + " (\"vbf_dEta_jj\", 0),\n", + " (\"vbf_dR_HH\", 0),\n", + " (\"vbf_dR_j0_HVV\", 0),\n", + " (\"vbf_dR_j1_HVV\", 0),\n", + " (\"vbf_dR_j0_Hbb\", 0),\n", + " (\"vbf_dR_j1_Hbb\", 0),\n", + " (\"vbf_cos1_j1\", 0),\n", + " (\"vbf_cos1_j2\", 0),\n", + " (\"vbf_prod_centrality\", 0),\n", + "]\n", "\n", "\n", "len(labels)\n", @@ -1605,23 +1796,22 @@ "\n", "for ax, var, label in zip(flat_axes, variables, labels):\n", " # Select the appropriate column data and index\n", - " data1 = df1[var] \n", - " data2 = df2[var] \n", + " data1 = df1[var]\n", + " data2 = df2[var]\n", "\n", " # Plot density histograms\n", - " ax.hist(data1, bins=30, alpha=0.5, label='Signal (df1)', density=True)\n", - " ax.hist(data2, bins=30, alpha=0.5, label='Background (df2)', density=True)\n", + " ax.hist(data1, bins=30, alpha=0.5, label=\"Signal (df1)\", density=True)\n", + " ax.hist(data2, bins=30, alpha=0.5, label=\"Background (df2)\", density=True)\n", " ax.set_title(label)\n", " ax.legend()\n", "\n", "# Remove unused subplots\n", - "for ax in flat_axes[len(variables):]:\n", - " ax.axis('off')\n", + "for ax in flat_axes[len(variables) :]:\n", + " ax.axis(\"off\")\n", "\n", "# Adjust the layout\n", "plt.tight_layout()\n", - "plt.show()\n", - "\n" + "plt.show()" ] } ], diff --git a/src/HHbbVV/postprocessing/PostProcessResNew.ipynb b/src/HHbbVV/postprocessing/PostProcessResNew.ipynb index 429f89f9..4d6ce5c8 100644 --- a/src/HHbbVV/postprocessing/PostProcessResNew.ipynb +++ b/src/HHbbVV/postprocessing/PostProcessResNew.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 9, + "execution_count": 116, "metadata": {}, "outputs": [], "source": [ @@ -25,6 +25,7 @@ " txbb_wps,\n", " jec_shifts,\n", " jmsr_shifts,\n", + " LUMI,\n", ")\n", "from postprocessing import res_shape_vars, new_filters, old_filters\n", "\n", @@ -33,6 +34,7 @@ "import numpy as np\n", "import pandas as pd\n", "import pickle, json\n", + "import hist\n", "from hist import Hist\n", "\n", "import os\n", @@ -43,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -53,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -69,57 +71,38 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80',\n", - " 'NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190',\n", - " 'NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125',\n", - " 'NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250',\n", - " 'NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150']" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "list(res_samples.values())" - ] - }, - { - "cell_type": "code", - "execution_count": 11, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ "# del nonres_samples[\"VBFHHbbVV\"]\n", - "keep_nonres_keys = [\"HHbbVV\", \"VBFHHbbVV\"]\n", - "nonres_samples = {key: nonres_samples[key] for key in keep_nonres_keys}" + "nonres_sig_keys = [\"HHbbVV\", \"VBFHHbbVV\"]\n", + "nonres_samples = {key: nonres_samples[key] for key in nonres_sig_keys}\n", + "\n", + "bg_keys = [\"QCD\", \"TT\", \"ST\", \"V+Jets\", \"Diboson\"]\n", + "samples = {key: samples[key] for key in [\"Data\"] + bg_keys}" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "MAIN_DIR = \"../../../\"\n", - "# samples_dir = f\"{MAIN_DIR}/../data/skimmer/Feb24\"\n", - "# signal_samples_dir = f\"{MAIN_DIR}/../data/skimmer/Mar10_2\"\n", - "samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Feb24\"\n", - "nonres_signal_samples_dir = \"/eos/uscms/store/user/cmantill/bbVV/skimmer/Jun10/\"\n", - "res_signal_samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Apr11/\"\n", - "year = \"2017\"\n", - "\n", - "date = \"23Aug22\"\n", + "samples_dir = f\"{MAIN_DIR}/../data/skimmer/Feb24\"\n", + "nonres_signal_samples_dir = f\"{MAIN_DIR}/../data/skimmer/Jun10\"\n", + "res_signal_samples_dir = f\"{MAIN_DIR}/../data/skimmer/23Aug22_5xhy\"\n", + "# samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Feb24\"\n", + "# nonres_signal_samples_dir = \"/eos/uscms/store/user/cmantill/bbVV/skimmer/Jun10/\"\n", + "# res_signal_samples_dir = \"/eos/uscms/store/user/rkansal/bbVV/skimmer/Apr11/\"\n", + "year = \"2018\"\n", + "\n", + "date = \"23Aug23\"\n", "plot_dir = f\"../../../plots/PostProcessing/{date}/\"\n", "templates_dir = f\"templates/{date}/\"\n", - "_ = os.system(f\"mkdir -p {plot_dir}/cutflows/\")\n", + "_ = os.system(f\"mkdir -p {plot_dir}/ControlPlots/{year}\")\n", + "_ = os.system(f\"mkdir -p {plot_dir}/cutflows\")\n", "_ = os.system(f\"mkdir -p {plot_dir}/templates/wshifts\")\n", "_ = os.system(f\"mkdir -p {plot_dir}/templates/jshifts\")\n", "_ = os.system(f\"mkdir -p {plot_dir}/templates/hists2d\")\n", @@ -138,47 +121,137 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 118, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loaded GluGluToHHTobbVV_node_cHHH1 : 163665 entries\n", - "Loaded VBF_HHTobbVV_CV_1_C2V_1_C3_1 : 18048 entries\n", - "Loaded GluGluToHHTobbVV_node_cHHH1 : 147046 entries\n", - "Loaded QCD_HT1000to1500 : 95305 entries\n", - "Loaded QCD_HT1500to2000 : 93470 entries\n", - "Loaded QCD_HT2000toInf : 51615 entries\n", + "Removing 1430 events\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80 : 144441 entries\n", + "Removing 1873 events\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190 : 210988 entries\n", + "Removing 2717 events\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125 : 292440 entries\n", + "Removing 2795 events\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250 : 321055 entries\n", + "Removing 2673 events\n", + "Loaded NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150 : 327318 entries\n", + "Removing 2014 events\n", + "Loaded GluGluToHHTobbVV_node_cHHH1 : 188941 entries\n", + "Removing 277 events\n", + "Loaded VBF_HHTobbVV_CV_1_C2V_1_C3_1 : 17539 entries\n", + "Loaded JetHT_Run2018A : 376541 entries\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/utils.py:151: UserWarning: evaluating in Python space because the '*' operator is not supported by numexpr for the bool dtype, use '&' instead.\n", + " hem_cut = np.any((events[\"ak8FatJetEta\"] > -3.2) * (events[\"ak8FatJetEta\"] <-1.3) * (events[\"ak8FatJetPhi\"] > -1.57) * (events[\"ak8FatJetPhi\"] <-0.87 ), axis=1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Removing 13922 events\n", + "Loaded JetHT_Run2018D : 833289 entries\n", + "Removing 2895 events\n", + "Loaded JetHT_Run2018C : 171794 entries\n", + "Loaded JetHT_Run2018B : 175146 entries\n", + "Removing 0 events\n", + "Loaded QCD_HT300to500 : 23 entries\n", + "Removing 0 events\n", "Loaded QCD_HT200to300 : 0 entries\n", - "Loaded QCD_HT300to500 : 15 entries\n", - "Loaded QCD_HT500to700 : 16447 entries\n", - "Loaded QCD_HT700to1000 : 148986 entries\n" + "Removing 3351 events\n", + "Loaded QCD_HT700to1000 : 197372 entries\n", + "Removing 1762 events\n", + "Loaded QCD_HT1000to1500 : 122855 entries\n", + "Removing 455 events\n", + "Loaded QCD_HT2000toInf : 73168 entries\n", + "Removing 1390 events\n", + "Loaded QCD_HT1500to2000 : 134020 entries\n", + "Removing 340 events\n", + "Loaded QCD_HT500to700 : 18460 entries\n" ] }, { - "ename": "MemoryError", - "evalue": "Unable to allocate 637. MiB for an array with shape (105, 795244) and data type float64", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mMemoryError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/uscms/home/rkansal/nobackup/HHbbVV/src/HHbbVV/postprocessing/PostProcessResNew.ipynb Cell 7\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m events_dict \u001b[39m=\u001b[39m utils\u001b[39m.\u001b[39mload_samples(res_signal_samples_dir, res_samples, year, new_filters)\n\u001b[1;32m 8\u001b[0m events_dict \u001b[39m|\u001b[39m\u001b[39m=\u001b[39m utils\u001b[39m.\u001b[39mload_samples(nonres_signal_samples_dir, nonres_samples, year, old_filters)\n\u001b[0;32m----> 9\u001b[0m events_dict \u001b[39m|\u001b[39m\u001b[39m=\u001b[39m utils\u001b[39m.\u001b[39;49mload_samples(samples_dir, samples, year, old_filters)\n\u001b[1;32m 11\u001b[0m utils\u001b[39m.\u001b[39madd_to_cutflow(events_dict, \u001b[39m\"\u001b[39m\u001b[39mPreselection\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mweight\u001b[39m\u001b[39m\"\u001b[39m, cutflow)\n\u001b[1;32m 13\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m)\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/HHbbVV/src/HHbbVV/postprocessing/utils.py:214\u001b[0m, in \u001b[0;36mload_samples\u001b[0;34m(data_dir, samples, year, filters, columns)\u001b[0m\n\u001b[1;32m 212\u001b[0m events[\u001b[39m\"\u001b[39m\u001b[39mfinalWeight_noTrigEffs\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m events[\u001b[39m\"\u001b[39m\u001b[39mweight_noTrigEffs\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m/\u001b[39m n_events\n\u001b[1;32m 213\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 214\u001b[0m events[\u001b[39m\"\u001b[39m\u001b[39mweight\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m/\u001b[39m\u001b[39m=\u001b[39m n_events\n\u001b[1;32m 216\u001b[0m \u001b[39mif\u001b[39;00m not_empty:\n\u001b[1;32m 217\u001b[0m events_dict[label]\u001b[39m.\u001b[39mappend(events)\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/frame.py:3473\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3471\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcolumns\u001b[39m.\u001b[39mis_unique \u001b[39mand\u001b[39;00m key \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcolumns:\n\u001b[1;32m 3472\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcolumns, MultiIndex):\n\u001b[0;32m-> 3473\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_getitem_multilevel(key)\n\u001b[1;32m 3474\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_item_cache(key)\n\u001b[1;32m 3476\u001b[0m \u001b[39m# Do we have a slicer (on rows)?\u001b[39;00m\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/frame.py:3560\u001b[0m, in \u001b[0;36mDataFrame._getitem_multilevel\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3558\u001b[0m result_columns \u001b[39m=\u001b[39m maybe_droplevels(new_columns, key)\n\u001b[1;32m 3559\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_is_mixed_type:\n\u001b[0;32m-> 3560\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mreindex(columns\u001b[39m=\u001b[39;49mnew_columns)\n\u001b[1;32m 3561\u001b[0m result\u001b[39m.\u001b[39mcolumns \u001b[39m=\u001b[39m result_columns\n\u001b[1;32m 3562\u001b[0m \u001b[39melse\u001b[39;00m:\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/util/_decorators.py:324\u001b[0m, in \u001b[0;36mrewrite_axis_style_signature..decorate..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 322\u001b[0m \u001b[39m@wraps\u001b[39m(func)\n\u001b[1;32m 323\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mwrapper\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Callable[\u001b[39m.\u001b[39m\u001b[39m.\u001b[39m\u001b[39m.\u001b[39m, Any]:\n\u001b[0;32m--> 324\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/frame.py:4804\u001b[0m, in \u001b[0;36mDataFrame.reindex\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 4802\u001b[0m kwargs\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39maxis\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[1;32m 4803\u001b[0m kwargs\u001b[39m.\u001b[39mpop(\u001b[39m\"\u001b[39m\u001b[39mlabels\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m)\n\u001b[0;32m-> 4804\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mreindex(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/generic.py:4948\u001b[0m, in \u001b[0;36mNDFrame.reindex\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 4942\u001b[0m \u001b[39mif\u001b[39;00m kwargs:\n\u001b[1;32m 4943\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mTypeError\u001b[39;00m(\n\u001b[1;32m 4944\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mreindex() got an unexpected keyword \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 4945\u001b[0m \u001b[39mf\u001b[39m\u001b[39m'\u001b[39m\u001b[39margument \u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mlist\u001b[39m(kwargs\u001b[39m.\u001b[39mkeys())[\u001b[39m0\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m'\u001b[39m\n\u001b[1;32m 4946\u001b[0m )\n\u001b[0;32m-> 4948\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_consolidate_inplace()\n\u001b[1;32m 4950\u001b[0m \u001b[39m# if all axes that are requested to reindex are equal, then only copy\u001b[39;00m\n\u001b[1;32m 4951\u001b[0m \u001b[39m# if indicated must have index names equal here as well as values\u001b[39;00m\n\u001b[1;32m 4952\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mall\u001b[39m(\n\u001b[1;32m 4953\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_axis(axis)\u001b[39m.\u001b[39midentical(ax)\n\u001b[1;32m 4954\u001b[0m \u001b[39mfor\u001b[39;00m axis, ax \u001b[39min\u001b[39;00m axes\u001b[39m.\u001b[39mitems()\n\u001b[1;32m 4955\u001b[0m \u001b[39mif\u001b[39;00m ax \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 4956\u001b[0m ):\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/generic.py:5653\u001b[0m, in \u001b[0;36mNDFrame._consolidate_inplace\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 5650\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mf\u001b[39m():\n\u001b[1;32m 5651\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mgr\u001b[39m.\u001b[39mconsolidate()\n\u001b[0;32m-> 5653\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_protect_consolidate(f)\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/generic.py:5641\u001b[0m, in \u001b[0;36mNDFrame._protect_consolidate\u001b[0;34m(self, f)\u001b[0m\n\u001b[1;32m 5639\u001b[0m \u001b[39mreturn\u001b[39;00m f()\n\u001b[1;32m 5640\u001b[0m blocks_before \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mgr\u001b[39m.\u001b[39mblocks)\n\u001b[0;32m-> 5641\u001b[0m result \u001b[39m=\u001b[39m f()\n\u001b[1;32m 5642\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mgr\u001b[39m.\u001b[39mblocks) \u001b[39m!=\u001b[39m blocks_before:\n\u001b[1;32m 5643\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_clear_item_cache()\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/generic.py:5651\u001b[0m, in \u001b[0;36mNDFrame._consolidate_inplace..f\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5650\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mf\u001b[39m():\n\u001b[0;32m-> 5651\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_mgr \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_mgr\u001b[39m.\u001b[39;49mconsolidate()\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/internals/managers.py:631\u001b[0m, in \u001b[0;36mBaseBlockManager.consolidate\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 629\u001b[0m bm \u001b[39m=\u001b[39m \u001b[39mtype\u001b[39m(\u001b[39mself\u001b[39m)(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mblocks, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39maxes, verify_integrity\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n\u001b[1;32m 630\u001b[0m bm\u001b[39m.\u001b[39m_is_consolidated \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m--> 631\u001b[0m bm\u001b[39m.\u001b[39;49m_consolidate_inplace()\n\u001b[1;32m 632\u001b[0m \u001b[39mreturn\u001b[39;00m bm\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/internals/managers.py:1685\u001b[0m, in \u001b[0;36mBlockManager._consolidate_inplace\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1683\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_consolidate_inplace\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 1684\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mis_consolidated():\n\u001b[0;32m-> 1685\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mblocks \u001b[39m=\u001b[39m \u001b[39mtuple\u001b[39m(_consolidate(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mblocks))\n\u001b[1;32m 1686\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_is_consolidated \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n\u001b[1;32m 1687\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_known_consolidated \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/internals/managers.py:2084\u001b[0m, in \u001b[0;36m_consolidate\u001b[0;34m(blocks)\u001b[0m\n\u001b[1;32m 2082\u001b[0m new_blocks: \u001b[39mlist\u001b[39m[Block] \u001b[39m=\u001b[39m []\n\u001b[1;32m 2083\u001b[0m \u001b[39mfor\u001b[39;00m (_can_consolidate, dtype), group_blocks \u001b[39min\u001b[39;00m grouper:\n\u001b[0;32m-> 2084\u001b[0m merged_blocks \u001b[39m=\u001b[39m _merge_blocks(\n\u001b[1;32m 2085\u001b[0m \u001b[39mlist\u001b[39;49m(group_blocks), dtype\u001b[39m=\u001b[39;49mdtype, can_consolidate\u001b[39m=\u001b[39;49m_can_consolidate\n\u001b[1;32m 2086\u001b[0m )\n\u001b[1;32m 2087\u001b[0m new_blocks \u001b[39m=\u001b[39m extend_blocks(merged_blocks, new_blocks)\n\u001b[1;32m 2088\u001b[0m \u001b[39mreturn\u001b[39;00m new_blocks\n", - "File \u001b[0;32m/uscms_data/d3/rkansal/mambaforge/envs/bbVV/lib/python3.10/site-packages/pandas/core/internals/managers.py:2118\u001b[0m, in \u001b[0;36m_merge_blocks\u001b[0;34m(blocks, dtype, can_consolidate)\u001b[0m\n\u001b[1;32m 2115\u001b[0m new_values \u001b[39m=\u001b[39m bvals2[\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39m_concat_same_type(bvals2, axis\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m)\n\u001b[1;32m 2117\u001b[0m argsort \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39margsort(new_mgr_locs)\n\u001b[0;32m-> 2118\u001b[0m new_values \u001b[39m=\u001b[39m new_values[argsort]\n\u001b[1;32m 2119\u001b[0m new_mgr_locs \u001b[39m=\u001b[39m new_mgr_locs[argsort]\n\u001b[1;32m 2121\u001b[0m bp \u001b[39m=\u001b[39m BlockPlacement(new_mgr_locs)\n", - "\u001b[0;31mMemoryError\u001b[0m: Unable to allocate 637. MiB for an array with shape (105, 795244) and data type float64" + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/utils.py:163: UserWarning: evaluating in Python space because the '*' operator is not supported by numexpr for the bool dtype, use '&' instead.\n", + " \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Removing 7609 events\n", + "Loaded TTToSemiLeptonic : 557747 entries\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/raghav/Documents/CERN/hhbbww/HHbbVV/src/HHbbVV/postprocessing/utils.py:163: UserWarning: evaluating in Python space because the '*' operator is not supported by numexpr for the bool dtype, use '&' instead.\n", + " \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Removing 10387 events\n", + "Loaded TTToHadronic : 749409 entries\n", + "Removing 290 events\n", + "Loaded ST_tW_top_5f_inclusiveDecays : 26038 entries\n", + "Removing 197 events\n", + "Loaded ST_tW_antitop_5f_inclusiveDecays : 19168 entries\n", + "Removing 277 events\n", + "Loaded ST_s-channel_4f_leptonDecays : 19853 entries\n", + "Removing 643 events\n", + "Loaded ST_t-channel_antitop_4f_InclusiveDecays : 42693 entries\n", + "Removing 0 events\n", + "Loaded WJetsToQQ_HT-200to400 : 0 entries\n", + "Removing 0 events\n", + "Loaded ZJetsToQQ_HT-200to400 : 0 entries\n", + "Removing 21 events\n", + "Loaded ZJetsToQQ_HT-400to600 : 685 entries\n", + "Removing 2276 events\n", + "Loaded WJetsToQQ_HT-800toInf : 133078 entries\n", + "Removing 1559 events\n", + "Loaded ZJetsToQQ_HT-600to800 : 75028 entries\n", + "Removing 563 events\n", + "Loaded WJetsToQQ_HT-600to800 : 25485 entries\n", + "Removing 4971 events\n", + "Loaded ZJetsToQQ_HT-800toInf : 294481 entries\n", + "Removing 6 events\n", + "Loaded WJetsToQQ_HT-400to600 : 213 entries\n", + "Removing 13 events\n", + "Loaded WW : 583 entries\n", + "Removing 17 events\n", + "Loaded ZZ : 949 entries\n", + "Removing 42 events\n", + "Loaded WZ : 2247 entries\n", + "\n", + "Pre-selection X[900]->H(bb)Y[80](VV) yield: 21.71\n", + "Pre-selection X[1200]->H(bb)Y[190](VV) yield: 33.48\n", + "Pre-selection X[2000]->H(bb)Y[125](VV) yield: 43.99\n", + "Pre-selection X[3000]->H(bb)Y[250](VV) yield: 48.29\n", + "Pre-selection X[4000]->H(bb)Y[150](VV) yield: 49.21\n", + "Pre-selection HHbbVV yield: 4.17\n", + "Pre-selection VBFHHbbVV yield: 0.08\n", + "Pre-selection Data yield: 1556770.00\n", + "Pre-selection QCD yield: 3003204.29\n", + "Pre-selection TT yield: 214598.05\n", + "Pre-selection ST yield: 15007.66\n", + "Pre-selection V+Jets yield: 84306.66\n", + "Pre-selection Diboson yield: 1339.33\n" ] } ], @@ -190,8 +263,8 @@ "\n", "# utils.remove_empty_parquets(samples_dir, year)\n", "events_dict = utils.load_samples(res_signal_samples_dir, res_samples, year, new_filters)\n", - "events_dict |= utils.load_samples(nonres_signal_samples_dir, nonres_samples, year, old_filters)\n", - "events_dict |= utils.load_samples(samples_dir, samples, year, old_filters)\n", + "events_dict |= utils.load_samples(nonres_signal_samples_dir, nonres_samples, year, new_filters)\n", + "events_dict |= utils.load_samples(samples_dir, samples, year, new_filters)\n", "\n", "utils.add_to_cutflow(events_dict, \"Preselection\", \"weight\", cutflow)\n", "\n", @@ -230,24 +303,6 @@ " print(f\"Pre-selection {sample} yield: {tot_weight:.2f}\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "bins = np.arange(-20, 61, 5)\n", - "plt.hist(events_dict[\"ST\"][\"weight\"], bins, histtype=\"step\", label=\"ST\")\n", - "plt.hist(events_dict[\"TT\"][\"weight\"], bins, histtype=\"step\", label=\"TT\")\n", - "plt.yscale(\"log\")\n", - "plt.ylabel(\"# Events\")\n", - "plt.xlabel(\"Weights\")\n", - "plt.legend()\n", - "plt.savefig(f\"{plot_dir}/sttt_weights.pdf\")" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -264,35 +319,92 @@ "source": [ "postprocessing.apply_weights(events_dict, year, cutflow)\n", "bb_masks = postprocessing.bb_VV_assignment(events_dict)\n", + "# postprocessing.derive_variables(events_dict)\n", "cutflow" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Control Plots" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "for sample, events in events_dict.items():\n", - " if \"ak8FatJetParTMD_THWWvsT\" not in events:\n", - " h4qvst = (events[\"ak8FatJetParTMD_probHWW3q\"] + events[\"ak8FatJetParTMD_probHWW4q\"]) / (\n", - " events[\"ak8FatJetParTMD_probHWW3q\"]\n", - " + events[\"ak8FatJetParTMD_probHWW4q\"]\n", - " + events[\"ak8FatJetParTMD_probQCD\"]\n", - " + events[\"ak8FatJetParTMD_probT\"]\n", - " )\n", + "samples = list(events_dict.keys())\n", + "weight_key = \"finalWeight\"\n", + "\n", + "control_plot_2d_vars = [\n", + " {\n", + " f\"{jet}FatJetPhi\": ([40, -3.5, 3.5], rf\"$\\varphi^{{{jet}}}$\"),\n", + " f\"{jet}FatJetEta\": ([40, -3, 3], rf\"$\\eta^{{{jet}}}$\"),\n", + " }\n", + " for jet in [\"bb\", \"VV\"]\n", + "]\n", + "\n", + "hists2d = []\n", + "\n", + "for vars2d in control_plot_2d_vars:\n", + " h = Hist(\n", + " hist.axis.StrCategory(samples, name=\"Sample\"),\n", + " *[hist.axis.Regular(*bins, name=var, label=label) for var, (bins, label) in vars2d.items()],\n", + " storage=hist.storage.Weight(),\n", + " )\n", + "\n", + " for sample in samples:\n", + " events = events_dict[sample]\n", "\n", - " events_dict[sample] = pd.concat(\n", - " [events, pd.concat([h4qvst], axis=1, keys=[\"ak8FatJetParTMD_THWWvsT\"])], axis=1\n", - " )" + " fill_data = {var: utils.get_feat(events, var, bb_masks[sample]) for var in vars2d}\n", + " weight = events[weight_key].values.squeeze()\n", + "\n", + " # if selection is not None:\n", + " # sel = selection[sample]\n", + " # fill_data[var] = fill_data[var][sel]\n", + " # weight = weight[sel]\n", + "\n", + " if len(weight):\n", + " h.fill(Sample=sample, **fill_data, weight=weight)\n", + "\n", + " hists2d.append(h)" ] }, { - "attachments": {}, - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "Control Plots" + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import mplhep as hep\n", + "\n", + "plot_keys = [\"Data\", \"QCD\", \"TT\", \"HHbbVV\", \"X[3000]->H(bb)Y[250](VV)\"]\n", + "\n", + "fig, axs = plt.subplots(\n", + " len(plot_keys),\n", + " 2,\n", + " figsize=(20, 8 * len(plot_keys)),\n", + " gridspec_kw={\"wspace\": 0.25, \"hspace\": 0.25},\n", + ")\n", + "\n", + "for j, key in enumerate(plot_keys):\n", + " for i in range(2):\n", + " ax = axs[j][i]\n", + " hep.hist2dplot(hists2d[i][key, ...], cmap=\"turbo\", ax=ax)\n", + " hep.cms.label(\n", + " \"Work in Progress\", data=True, lumi=round(LUMI[year] * 1e-3), year=year, ax=ax\n", + " )\n", + " ax.set_title(key, y=1.07)\n", + " ax._children[0].colorbar.set_label(\"Events\")\n", + "\n", + "plt.savefig(f\"{plot_dir}/ControlPlots/{year}/HEM2d.pdf\", bbox_inches=\"tight\")\n", + "plt.show()" ] }, { @@ -303,57 +415,48 @@ "source": [ "# {var: (bins, label)}\n", "control_plot_vars = {\n", - " # \"MET_pt\": ([50, 0, 300], r\"$p^{miss}_T$ (GeV)\"),\n", + " \"MET_pt\": ([40, 0, 320], r\"$p^{miss}_T$ (GeV)\"),\n", " # \"DijetEta\": ([50, -8, 8], r\"$\\eta^{jj}$\"),\n", " # \"DijetPt\": ([50, 0, 750], r\"$p_T^{jj}$ (GeV)\"),\n", - " # \"DijetMass\": (\n", - " # # list(range(800, 1400, 100)) + [1400, 1600, 2000, 3000, 4400],\n", - " # [40, 600, 4500],\n", - " # r\"$m^{jj}$ (GeV)\",\n", - " # ),\n", - " # \"bbFatJetEta\": ([50, -2.4, 2.4], r\"$\\eta^{bb}$\"),\n", - " # \"bbFatJetPt\": ([50, 300, 1500], r\"$p^{bb}_T$ (GeV)\"),\n", - " # \"bbFatJetParticleNetMass\": ([40, 52.5, 252.5], r\"$m^{bb}_{reg}$ (GeV)\"),\n", + " # \"DijetMass\": ([50, 500, 3000], r\"$m^{jj}$ (GeV)\"),\n", + " \"bbFatJetPhi\": ([40, -3.5, 3.5], r\"$\\varphi^{bb}$\"),\n", + " \"bbFatJetEta\": ([40, -3, 3], r\"$\\eta^{bb}$\"),\n", + " \"bbFatJetPt\": ([40, 300, 2300], r\"$p^{bb}_T$ (GeV)\"), # TODO: increase bin widths, x max\n", + " # \"bbFatJetParticleNetMass\": ([50, 0, 300], r\"$m^{bb}_{reg}$ (GeV)\"),\n", " # \"bbFatJetMsd\": ([50, 0, 300], r\"$m^{bb}_{msd}$ (GeV)\"),\n", - " # \"bbFatJetParticleNetMD_Txbb\": ([50, 0.8, 1], r\"$T^{bb}_{Xbb}$\"),\n", - " # \"VVFatJetEta\": ([50, -2.4, 2.4], r\"$\\eta^{VV}$\"),\n", - " # \"VVFatJetPt\": ([50, 300, 1500], r\"$p^{VV}_T$ (GeV)\"),\n", - " # \"VVFatJetParticleNetMass\": (\n", - " # # list(range(50, 110, 10)) + list(range(110, 200, 15)) + [200, 220, 250],\n", - " # [20, 50, 250],\n", - " # r\"$m^{VV}_{reg}$ (GeV)\",\n", - " # ),\n", - " # \"VVFatJetMsd\": ([40, 50, 250], r\"$m^{VV}_{msd}$ (GeV)\"),\n", + " # \"bbFatJetParticleNetMD_Txbb\": ([50, 0.8, 1], r\"$p^{bb}_{Txbb}$\"),\n", + " \"VVFatJetPhi\": ([40, -3.5, 3.5], r\"$\\varphi^{VV}$\"),\n", + " \"VVFatJetEta\": ([40, -3, 3], r\"$\\eta^{VV}$\"),\n", + " \"VVFatJetPt\": ([40, 300, 2300], r\"$p^{VV}_T$ (GeV)\"),\n", + " # \"VVFatJetParticleNetMass\": ([50, 0, 300], r\"$m^{VV}_{reg}$ (GeV)\"),\n", + " # \"VVFatJetMsd\": ([50, 0, 300], r\"$m^{VV}_{msd}$ (GeV)\"),\n", " # \"VVFatJetParticleNet_Th4q\": ([50, 0, 1], r\"Prob($H \\to 4q$) vs Prob(QCD) (Non-MD)\"),\n", " # \"VVFatJetParTMD_THWW4q\": (\n", " # [50, 0, 1],\n", " # r\"Prob($H \\to VV \\to 4q$) vs Prob(QCD) (Mass-Decorrelated)\",\n", " # ),\n", " # \"VVFatJetParTMD_probT\": ([50, 0, 1], r\"Prob(Top) (Mass-Decorrelated)\"),\n", - " # \"VVFatJetParTMD_THWWvsT\": (\n", - " # [50, 0, 1],\n", - " # r\"$T^{VV}_{HWW}$\",\n", - " # ),\n", " # \"bbFatJetPtOverDijetPt\": ([50, 0, 40], r\"$p^{bb}_T / p_T^{jj}$\"),\n", " # \"VVFatJetPtOverDijetPt\": ([50, 0, 40], r\"$p^{VV}_T / p_T^{jj}$\"),\n", " # \"VVFatJetPtOverbbFatJetPt\": ([50, 0.4, 2.0], r\"$p^{VV}_T / p^{bb}_T$\"),\n", - " \"nGoodMuons\": ([3, 0, 3], r\"# of Muons\"),\n", - " \"nGoodElectrons\": ([3, 0, 3], r\"# of Electrons\"),\n", - " \"nGoodJets\": ([5, 0, 5], r\"# of AK4 B-Jets\"),\n", + " # \"nGoodMuons\": ([3, 0, 3], r\"# of Muons\"),\n", + " # \"nGoodElectrons\": ([3, 0, 3], r\"# of Electrons\"),\n", + " # \"nGoodJets\": ([5, 0, 5], r\"# of AK4 B-Jets\"),\n", + " # \"BDTScore\": ([50, 0, 1], r\"BDT Score\"),\n", "}\n", "\n", - "# hists = postprocessing.control_plots(\n", - "# events_dict,\n", - "# bb_masks,\n", - "# nonres_sig_keys + res_sig_keys,\n", - "# control_plot_vars,\n", - "# f\"{plot_dir}/ControlPlots/{year}/\",\n", - "# year,\n", - "# sig_splits=sig_splits,\n", - "# # bg_keys=bg_keys + list(higgs_samples.keys()),\n", - "# bg_keys=[\"QCD\", \"TT\", \"ST\", \"V+Jets\", \"Hbb\"],\n", - "# show=True,\n", - "# )" + "hists = postprocessing.control_plots(\n", + " events_dict,\n", + " bb_masks,\n", + " nonres_sig_keys + res_sig_keys,\n", + " control_plot_vars,\n", + " f\"{plot_dir}/ControlPlots/{year}/\",\n", + " year,\n", + " bg_keys=bg_keys,\n", + " sig_scale_dict={\"HHbbVV\": 1e5, \"VBFHHbbVV\": 2e6} | {key: 2e4 for key in res_sig_keys},\n", + " # bg_keys=[\"QCD\", \"TT\", \"ST\", \"V+Jets\", \"Hbb\"],\n", + " show=True,\n", + ")" ] }, { @@ -689,7 +792,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.4" + "version": "3.10.11" }, "orig_nbformat": 4, "vscode": { diff --git a/src/HHbbVV/postprocessing/TopAnalysis.ipynb b/src/HHbbVV/postprocessing/TopAnalysis.ipynb index 29a0223a..09eaec17 100644 --- a/src/HHbbVV/postprocessing/TopAnalysis.ipynb +++ b/src/HHbbVV/postprocessing/TopAnalysis.ipynb @@ -49,7 +49,7 @@ "metadata": {}, "outputs": [], "source": [ - "plot_dir = \"../../../plots/ttsfs/23Jul3\"\n", + "plot_dir = \"../../../plots/ttsfs/23Aug24_JMEOR_plots\"\n", "\n", "import os\n", "\n", @@ -798,7 +798,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.10.11" }, "orig_nbformat": 4, "vscode": { diff --git a/src/HHbbVV/postprocessing/bash_scripts/ControlPlots.sh b/src/HHbbVV/postprocessing/bash_scripts/ControlPlots.sh new file mode 100755 index 00000000..c0be40d0 --- /dev/null +++ b/src/HHbbVV/postprocessing/bash_scripts/ControlPlots.sh @@ -0,0 +1,11 @@ +MAIN_DIR="../../.." +TAG=23Aug23_JME_OR_plots + +for year in 2016APV 2016 2017 2018 +do + python -u postprocessing.py --control-plots --year $year --resonant --HEM2D \ + --data-dir "${MAIN_DIR}/../data/skimmer/Feb24" \ + --signal-data-dirs "${MAIN_DIR}/../data/skimmer/Jun10" "${MAIN_DIR}/../data/skimmer/23Aug22_5xhy" \ + --sig-samples 'HHbbVV' 'VBFHHbbVV' 'NMSSM_XToYHTo2W2BTo4Q2B_MX-900_MY-80' 'NMSSM_XToYHTo2W2BTo4Q2B_MX-1200_MY-190' 'NMSSM_XToYHTo2W2BTo4Q2B_MX-2000_MY-125' 'NMSSM_XToYHTo2W2BTo4Q2B_MX-3000_MY-250' 'NMSSM_XToYHTo2W2BTo4Q2B_MX-4000_MY-150' \ + --plot-dir "${MAIN_DIR}/plots/PostProcessing/$TAG" +done \ No newline at end of file diff --git a/src/HHbbVV/postprocessing/plotting.py b/src/HHbbVV/postprocessing/plotting.py index 2d00167a..7f50e6c4 100644 --- a/src/HHbbVV/postprocessing/plotting.py +++ b/src/HHbbVV/postprocessing/plotting.py @@ -56,10 +56,10 @@ sig_colours = [ "#23CE6B", + "#7F2CCB", "#ffbaba", "#ff7b7b", "#ff5252", - # "#EDB458", "#a70000", "#885053", "#3C0919", @@ -85,14 +85,14 @@ def ratioHistPlot( year: str, sig_keys: List[str], bg_keys: List[str], - sig_colours: Dict[str, str] = sig_colours, + sig_colours: List[str] = sig_colours, bg_colours: Dict[str, str] = bg_colours, sig_err: Union[ArrayLike, str] = None, data_err: Union[ArrayLike, bool, None] = None, title: str = None, blind_region: list = None, name: str = "", - sig_scale_dict: Dict[str, float] = None, + sig_scale_dict: OrderedDict[str, float] = None, ylim: int = None, show: bool = True, variation: Tuple = None, @@ -129,6 +129,8 @@ def ratioHistPlot( (wshift: name of systematic e.g. pileup, shift: up or down, wsamples: list of samples which are affected by this) plot_data (bool): plot data """ + + # set up samples, colours and labels bg_keys = [key for key in bg_order if key in bg_keys] bg_colours = [colours[bg_colours[sample]] for sample in bg_keys] bg_labels = deepcopy(bg_keys) @@ -145,6 +147,7 @@ def ratioHistPlot( ] ) + # set up systematic variations if needed if variation is not None: wshift, shift, wsamples = variation skey = {"up": " Up", "down": " Down"}[shift] @@ -161,11 +164,15 @@ def ratioHistPlot( sig_labels[new_key] = sig_labels[sig_key] + skey del sig_scale_dict[sig_key], sig_labels[sig_key] + # set up plots fig, (ax, rax) = plt.subplots( 2, 1, figsize=(12, 14), gridspec_kw=dict(height_ratios=[3, 1], hspace=0), sharex=True ) + # plot histograms ax.set_ylabel("Events") + + # background samples hep.histplot( [hists[sample, :] for sample in bg_keys], ax=ax, @@ -175,6 +182,7 @@ def ratioHistPlot( color=bg_colours, ) + # signal samples if len(sig_scale_dict): hep.histplot( [hists[sig_key, :] * sig_scale for sig_key, sig_scale in sig_scale_dict.items()], @@ -184,6 +192,7 @@ def ratioHistPlot( color=sig_colours[: len(sig_keys)], ) + # plot signal errors if type(sig_err) == str: scolours = {"down": colours["lightred"], "up": colours["darkred"]} for skey, shift in [("Up", "up"), ("Down", "down")]: @@ -209,6 +218,7 @@ def ratioHistPlot( sig_scale, ) + # plot data if plot_data: hep.histplot( hists[data_key, :], @@ -218,6 +228,7 @@ def ratioHistPlot( label=data_key, color="black", ) + ax.legend() if ylim is not None: @@ -225,6 +236,7 @@ def ratioHistPlot( else: ax.set_ylim(0) + # plot ratio below if plot_data: bg_tot = sum([hists[sample, :] for sample in bg_keys]) yerr = ratio_uncertainty(hists[data_key, :].values(), bg_tot.values(), "poisson") @@ -373,7 +385,7 @@ def ratioLinePlot( if title is not None: ax.set_title(title, y=1.08) - hep.cms.label("Work in Progress", data=True, lumi=LUMI[year] * 1e-3, year=year, ax=ax) + hep.cms.label("Work in Progress", data=True, lumi=round(LUMI[year] * 1e-3), year=year, ax=ax) if len(name): plt.savefig(name, bbox_inches="tight") @@ -476,3 +488,29 @@ def rocCurve( plt.ylim(*ylim) hep.cms.label(data=False, rlabel="") plt.savefig(f"{plotdir}/{name}.pdf", bbox_inches="tight") + + +def plot_HEM2d(hists2d: List[Hist], plot_keys: List[str], year: str, name: str, show: bool = False): + fig, axs = plt.subplots( + len(plot_keys), + 2, + figsize=(20, 8 * len(plot_keys)), + gridspec_kw={"wspace": 0.25, "hspace": 0.25}, + ) + + for j, key in enumerate(plot_keys): + for i in range(2): + ax = axs[j][i] + hep.hist2dplot(hists2d[i][key, ...], cmap="turbo", ax=ax) + hep.cms.label( + "Work in Progress", data=True, lumi=round(LUMI[year] * 1e-3), year=year, ax=ax + ) + ax.set_title(key, y=1.07) + ax._children[0].colorbar.set_label("Events") + + plt.savefig(name, bbox_inches="tight") + + if show: + plt.show() + else: + plt.close() diff --git a/src/HHbbVV/postprocessing/postprocessing.py b/src/HHbbVV/postprocessing/postprocessing.py index 835a9f60..3dc93ce5 100644 --- a/src/HHbbVV/postprocessing/postprocessing.py +++ b/src/HHbbVV/postprocessing/postprocessing.py @@ -124,19 +124,19 @@ class Region: # {var: (bins, label)} control_plot_vars = { - "MET_pt": ([50, 0, 250], r"$p^{miss}_T$ (GeV)"), + "MET_pt": ([40, 0, 320], r"$p^{miss}_T$ (GeV)"), # "DijetEta": ([50, -8, 8], r"$\eta^{jj}$"), # "DijetPt": ([50, 0, 750], r"$p_T^{jj}$ (GeV)"), # "DijetMass": ([50, 500, 3000], r"$m^{jj}$ (GeV)"), - "bbFatJetPhi": ([50, -2.4, 2.4], r"$\varphi^{bb}$"), - "bbFatJetEta": ([50, -2.4, 2.4], r"$\eta^{bb}$"), - "bbFatJetPt": ([50, 300, 1300], r"$p^{bb}_T$ (GeV)"), + "bbFatJetPhi": ([40, -3.5, 3.5], r"$\varphi^{bb}$"), + "bbFatJetEta": ([40, -3, 3], r"$\eta^{bb}$"), + "bbFatJetPt": ([40, 300, 2300], r"$p^{bb}_T$ (GeV)"), # TODO: increase bin widths, x max # "bbFatJetParticleNetMass": ([50, 0, 300], r"$m^{bb}_{reg}$ (GeV)"), # "bbFatJetMsd": ([50, 0, 300], r"$m^{bb}_{msd}$ (GeV)"), # "bbFatJetParticleNetMD_Txbb": ([50, 0.8, 1], r"$p^{bb}_{Txbb}$"), - "VVFatJetPhi": ([50, -2.4, 2.4], r"$\varphi^{VV}$"), - "VVFatJetEta": ([50, -2.4, 2.4], r"$\eta^{VV}$"), - "VVFatJetPt": ([50, 300, 1300], r"$p^{VV}_T$ (GeV)"), + "VVFatJetPhi": ([40, -3.5, 3.5], r"$\varphi^{VV}$"), + "VVFatJetEta": ([40, -3, 3], r"$\eta^{VV}$"), + "VVFatJetPt": ([40, 300, 2300], r"$p^{VV}_T$ (GeV)"), # "VVFatJetParticleNetMass": ([50, 0, 300], r"$m^{VV}_{reg}$ (GeV)"), # "VVFatJetMsd": ([50, 0, 300], r"$m^{VV}_{msd}$ (GeV)"), # "VVFatJetParticleNet_Th4q": ([50, 0, 1], r"Prob($H \to 4q$) vs Prob(QCD) (Non-MD)"), @@ -257,7 +257,7 @@ def get_res_selection_regions( nonres_shape_vars = [ ShapeVar( "bbFatJetParticleNetMass", - r"$m^{bb}_{Reg}$ (GeV)", + r"$m^{bb}_\mathrm{Reg}$ (GeV)", [20, 50, 250], reg=True, blind_window=[100, 150], @@ -269,7 +269,7 @@ def get_res_selection_regions( res_shape_vars = [ ShapeVar( "VVFatJetParticleNetMass", - r"$m^{VV}_{Reg}$ (GeV)", + r"$m^{VV}_\mathrm{Reg}$ (GeV)", list(range(50, 110, 10)) + list(range(110, 200, 15)) + [200, 220, 250], reg=False, ), @@ -323,12 +323,12 @@ def main(args): bb_masks = bb_VV_assignment(events_dict) - if "finalWeight_noTrigEffs" not in events_dict[list(events_dict.keys())[0]]: - # trigger effs (if not already from processor) - apply_weights(events_dict, args.year, cutflow) - print("\nCutflow\n", cutflow) - # THWW score vs Top (if not already from processor) - derive_variables(events_dict) + # trigger effs and QCD scale (if not already from processor) + apply_weights(events_dict, args.year, cutflow) + print("\nCutflow\n", cutflow) + + # THWW score vs Top (if not already from processor) + derive_variables(events_dict) if args.plot_dir != "": cutflow.to_csv(f"{args.plot_dir}/preselection_cutflow.csv") @@ -349,6 +349,7 @@ def main(args): # Control plots if args.control_plots: print("\nMaking control plots\n") + control_plots( events_dict, bb_masks, @@ -356,6 +357,8 @@ def main(args): control_plot_vars, f"{args.plot_dir}/ControlPlots/", args.year, + bg_keys=args.bg_keys, + sig_scale_dict={"HHbbVV": 1e5, "VBFHHbbVV": 2e6} | {key: 2e4 for key in res_sig_keys}, # sig_splits=sig_splits, show=False, ) @@ -390,7 +393,7 @@ def main(args): args.bdt_preds_dir, template_dir, systs_file, - args.signal_data_dir, + args.signal_data_dirs[0], ) # Check for 0 weights - would be an issue for weight shifts @@ -424,6 +427,7 @@ def main(args): jshift=jshift, blind_pass=True if args.resonant else False, show=False, + hists_HEM2d=args.HEM2d, plot_shifts=args.plot_shifts, ) templates = {**templates, **temps} @@ -466,7 +470,7 @@ def _process_samples(args, BDT_sample_order: List[str] = None): if args.read_sig_samples: # read all signal samples in directory read_year = args.year if args.year != "all" else "2017" - read_samples = os.listdir(f"{args.signal_data_dir}/{args.year}") + read_samples = os.listdir(f"{args.signal_data_dirs[0]}/{args.year}") sig_samples = OrderedDict() for sample in read_samples: if sample.startswith("NMSSM_XToYHTo2W2BTo4Q2B_MX-"): @@ -480,6 +484,24 @@ def _process_samples(args, BDT_sample_order: List[str] = None): if sample not in args.sig_samples: del sig_samples[sig_key] + # can load in nonres samples for control plots + for sample in args.sig_samples: + if sample in nonres_samples: + sig_samples[sample] = nonres_samples[sample] + + # re-order acording to input ordering + tsig_samples = OrderedDict() + for sample in args.sig_samples: + if sample in sig_samples: + # if sample is a key, get it directly + tsig_samples[sample] = sig_samples[sample] + else: + # else if it is a value, have to find corresponding key + key = [key for key, value in sig_samples.items() if value == sample][0] + tsig_samples[key] = sample + + sig_samples = tsig_samples + bg_samples = deepcopy(samples) for bg_key, sample in list(bg_samples.items()): if bg_key not in args.bg_keys and bg_key != data_key: @@ -564,7 +586,10 @@ def _load_samples(args, samples, sig_samples, cutflow, filters=None): if filters is None: filters = old_filters if args.old_processor else new_filters - events_dict = utils.load_samples(args.signal_data_dir, sig_samples, args.year, filters) + events_dict = {} + for d in args.signal_data_dirs: + events_dict = {**events_dict, **utils.load_samples(d, sig_samples, args.year, filters)} + if args.data_dir: events_dict = { **events_dict, @@ -589,7 +614,6 @@ def apply_weights( events_dict: Dict[str, pd.DataFrame], year: str, cutflow: pd.DataFrame = None, - weight_key: str = "finalWeight", ): """ Applies (1) 2D trigger scale factors, (2) QCD scale facotr. @@ -611,11 +635,13 @@ def apply_weights( np.nan_to_num(effs_txbb.view(flow=False), 0), np.squeeze(effs_txbb.axes.edges) ) + weight_key = "finalWeight" + for sample in events_dict: events = events_dict[sample] if sample == data_key: events[weight_key] = events["weight"] - else: + elif f"{weight_key}_noTrigEffs" not in events: fj_trigeffs = ak8TrigEffsLookup( events["ak8FatJetParticleNetMD_Txbb"].values, events["ak8FatJetPt"].values, @@ -674,6 +700,10 @@ def derive_variables(events_dict: Dict[str, pd.DataFrame]): if "VVFatJetParTMD_THWWvsT" in events or "ak8FatJetParTMD_THWWvsT" in events: continue + if "ak8FatJetParTMD_probT" not in events: + warnings.warn(f"ParT variables missing in {sample}!") + continue + h4qvst = (events["ak8FatJetParTMD_probHWW3q"] + events["ak8FatJetParTMD_probHWW4q"]) / ( events["ak8FatJetParTMD_probHWW3q"] + events["ak8FatJetParTMD_probHWW4q"] @@ -918,6 +948,55 @@ def lpsfs( utils.add_to_cutflow(events_dict, "LP SF", "finalWeight", cutflow) +def hists_HEM2d( + events_dict: Dict[str, pd.DataFrame], + bb_masks: Dict[str, pd.DataFrame], + weight_key: str = "finalWeight", + selection: Dict[str, np.ndarray] = None, +): + """Create 2D hists of FatJet phi vs eta for bb and VV jets as a check for HEM cleaning.""" + HEM2d_vars = [ + { + f"{jet}FatJetPhi": ([40, -3.5, 3.5], rf"$\varphi^{{{jet}}}$"), + f"{jet}FatJetEta": ([40, -3, 3], rf"$\eta^{{{jet}}}$"), + } + for jet in ["bb", "VV"] + ] + + samples = list(events_dict.keys()) + hists2d = [] + + for vars2d in HEM2d_vars: + h = Hist( + hist.axis.StrCategory(samples, name="Sample"), + *[ + hist.axis.Regular(*bins, name=var, label=label) + for var, (bins, label) in vars2d.items() + ], + storage=hist.storage.Weight(), + ) + + for sample in samples: + events = events_dict[sample] + + fill_data = {var: utils.get_feat(events, var, bb_masks[sample]) for var in vars2d} + weight = events[weight_key].values.squeeze() + + if selection is not None: + sel = selection[sample] + for var in fill_data: + fill_data[var] = fill_data[var][sel] + + weight = weight[sel] + + if len(weight): + h.fill(Sample=sample, **fill_data, weight=weight) + + hists2d.append(h) + + return hists2d + + def control_plots( events_dict: Dict[str, pd.DataFrame], bb_masks: Dict[str, pd.DataFrame], @@ -933,6 +1012,7 @@ def control_plots( selection: Dict[str, np.ndarray] = None, sig_scale_dict: Dict[str, float] = None, combine_pdf: bool = True, + HEM2d: bool = False, show: bool = False, ): """ @@ -966,6 +1046,9 @@ def control_plots( events_dict, var, bins, label, bb_masks, weight_key=weight_key, selection=selection ) + if HEM2d and year == "2018": + hists["HEM2d"] = hists_HEM2d(events_dict, bb_masks, weight_key, selection) + with open(f"{plot_dir}/hists.pkl", "wb") as f: pickle.dump(hists, f) @@ -995,6 +1078,14 @@ def control_plots( merger_control_plots.write(f"{tplot_dir}/{cutstr}{year}_ControlPlots.pdf") merger_control_plots.close() + if HEM2d and year == "2018": + # TODO: change plot keys? + name = f"{plot_dir}/HEM2d.pdf" + # plot keys + plotting.plot_HEM2d( + hists["HEM2d"], ["Data", "QCD", "TT", "HHbbVV", "X[3000]->H(bb)Y[250](VV)"], year, name + ) + return hists @@ -1305,9 +1396,10 @@ def save_templates( ) parser.add_argument( - "--signal-data-dir", - default="", + "--signal-data-dirs", + default=[], help="path to skimmed signal parquets, if different from other data", + nargs="*", type=str, ) @@ -1377,6 +1469,9 @@ def save_templates( ) utils.add_bool_arg(parser, "data", "include data", default=True) + utils.add_bool_arg( + parser, "HEM2d", "fatjet phi v eta plots to check HEM cleaning", default=False + ) utils.add_bool_arg(parser, "old-processor", "temp arg for old processed samples", default=False) @@ -1416,12 +1511,12 @@ def save_templates( args = parser.parse_args() - if args.template_dir == "": - print("Need to set --template-dir. Exiting.") + if args.templates and args.template_dir == "": + print("Need to set --template-dir if making templates. Exiting.") sys.exit() - if args.signal_data_dir == "": - args.signal_data_dir = args.data_dir + if not args.signal_data_dirs: + args.signal_data_dirs = [args.data_dir] if args.bdt_preds_dir == "" and not args.resonant: args.bdt_preds_dir = f"{args.data_dir}/inferences/" diff --git a/src/HHbbVV/postprocessing/utils.py b/src/HHbbVV/postprocessing/utils.py index 63e055c6..81043855 100644 --- a/src/HHbbVV/postprocessing/utils.py +++ b/src/HHbbVV/postprocessing/utils.py @@ -144,6 +144,32 @@ def check_selector(sample: str, selector: Union[str, List[str]]): return False +def _hem_cleaning(sample, events): + if sample.startswith("JetHT") or sample.startswith("SingleMuon"): + if sample.endswith("2018C") or sample.endswith("2018D"): + hem_cut = np.any( + (events["ak8FatJetEta"] > -3.2) + * (events["ak8FatJetEta"] < -1.3) + * (events["ak8FatJetPhi"] > -1.57) + * (events["ak8FatJetPhi"] < -0.87), + axis=1, + ) + print(f"Removing {np.sum(hem_cut)} events") + return events[~hem_cut] + else: + return events + else: + hem_cut = np.any( + (events["ak8FatJetEta"] > -3.2) + * (events["ak8FatJetEta"] < -1.3) + * (events["ak8FatJetPhi"] > -1.57) + * (events["ak8FatJetPhi"] < -0.87), + axis=1, + ) * (np.random.rand(len(events)) < 0.632) + print(f"Removing {np.sum(hem_cut)} events") + return events[~hem_cut] + + def load_samples( data_dir: str, samples: Dict[str, str], @@ -213,6 +239,9 @@ def load_samples( else: events["weight"] /= n_events + if year == "2018": + events = _hem_cleaning(sample, events) + if not_empty: events_dict[label].append(events) diff --git a/src/HHbbVV/processors/TTScaleFactorsSkimmer.py b/src/HHbbVV/processors/TTScaleFactorsSkimmer.py index c311f416..36ff6d50 100644 --- a/src/HHbbVV/processors/TTScaleFactorsSkimmer.py +++ b/src/HHbbVV/processors/TTScaleFactorsSkimmer.py @@ -91,9 +91,10 @@ class TTScaleFactorsSkimmer(ProcessorABC): "particleNet_H4qvsQCD": "ParticleNet_Th4q", "nConstituents": "nPFCands", }, + "Jet": P4, "FatJetDerived": ["tau21", "tau32", "tau43", "tau42", "tau41"], "GenHiggs": P4, - "other": {"MET_pt": "MET_pt"}, + "other": {"MET_pt": "MET_pt", "MET_phi": "MET_phi"}, } muon_selection = { @@ -135,12 +136,14 @@ class TTScaleFactorsSkimmer(ProcessorABC): top_matchings = ["top_matched", "w_matched", "unmatched"] - def __init__(self, xsecs={}): + def __init__(self, xsecs={}, inference: bool = True): super(TTScaleFactorsSkimmer, self).__init__() # TODO: Check if this is correct self.XSECS = xsecs # in pb + self._inference = inference + # find corrections path using this file's path package_path = str(pathlib.Path(__file__).parent.parent.resolve()) with gzip.open(package_path + "/corrections/corrections.pkl.gz", "rb") as filehandler: @@ -230,6 +233,7 @@ def process(self, events: ak.Array): add_selection("trigger", HLT_triggered, *selection_args) # objects + num_ak4_jets = 2 num_jets = 1 muon = events.Muon fatjets = get_jec_jets(events, year) if not isData else events.FatJet @@ -321,7 +325,35 @@ def process(self, events: ak.Array): add_selection("ak4_jet", ak.any(ak4_jet_selector, axis=1), *selection_args) + # 2018 HEM cleaning + # https://indico.cern.ch/event/1249623/contributions/5250491/attachments/2594272/4477699/HWW_0228_Draft.pdf + if year == "2018": + check_fatjets = events.FatJet[:, :2] + hem_cleaning = ( + ((events.run >= 319077) & isData) # if data check if in Runs C or D + # else for MC randomly cut based on lumi fraction of C&D + | ((np.random.rand(len(events)) < 0.632) & ~isData) + ) & ( + ak.any( + ( + (leading_fatjets.pt > 30.0) + & (leading_fatjets.eta > -3.2) + & (leading_fatjets.eta < -1.3) + & (leading_fatjets.phi > -1.57) + & (leading_fatjets.phi < -0.87) + ), + -1, + ) + | ((events.MET.phi > -1.62) & (events.MET.pt < 470.0) & (events.MET.phi < -0.62)) + ) + + add_selection("hem_cleaning", ~np.array(hem_cleaning).astype(bool), *selection_args) + # select vars + ak4JetVars = { + f"ak4Jet{key}": pad_val(ak4_jets[ak4_jet_selector], num_ak4_jets, axis=1) + for (var, key) in self.skim_vars["Jet"].items() + } ak8FatJetVars = { f"ak8FatJet{key}": pad_val(leading_fatjets[var], num_jets, axis=1) @@ -339,7 +371,7 @@ def process(self, events: ak.Array): for (var, key) in self.skim_vars["other"].items() } - skimmed_events = {**skimmed_events, **ak8FatJetVars, **otherVars} + skimmed_events = {**skimmed_events, **ak4JetVars, **ak8FatJetVars, **otherVars} # particlenet h4q vs qcd, xbb vs qcd @@ -446,23 +478,24 @@ def process(self, events: ak.Array): } # apply HWW4q tagger - # print("pre-inference") - - # pnet_vars = runInferenceTriton( - # self.tagger_resources_path, - # events[selection.all(*selection.names)], - # num_jets=1, - # in_jet_idx=fatjet_idx[selection.all(*selection.names)], - # jets=ak.flatten(leading_fatjets[selection.all(*selection.names)]), - # all_outputs=False, - # ) - - # print("post-inference") - - # skimmed_events = { - # **skimmed_events, - # **{key: value for (key, value) in pnet_vars.items()}, - # } + if self._inference: + print("pre-inference") + + pnet_vars = runInferenceTriton( + self.tagger_resources_path, + events[selection.all(*selection.names)], + num_jets=1, + in_jet_idx=fatjet_idx[selection.all(*selection.names)], + jets=ak.flatten(leading_fatjets[selection.all(*selection.names)]), + all_outputs=False, + ) + + print("post-inference") + + skimmed_events = { + **skimmed_events, + **{key: value for (key, value) in pnet_vars.items()}, + } if len(skimmed_events["weight"]): df = self.to_pandas(skimmed_events) diff --git a/src/HHbbVV/scale_factors/VV_reweighting.ipynb b/src/HHbbVV/scale_factors/VV_reweighting.ipynb index 0056984e..401e4a34 100644 --- a/src/HHbbVV/scale_factors/VV_reweighting.ipynb +++ b/src/HHbbVV/scale_factors/VV_reweighting.ipynb @@ -900,12 +900,7 @@ " return ratio_smeared_lookups, ratio_lnN_smeared_lookups, ratio_sys_up, ratio_sys_down\n", "\n", "\n", - "(\n", - " ratio_smeared_lookups,\n", - " ratio_lnN_smeared_lookups,\n", - " ratio_sys_up,\n", - " ratio_sys_down,\n", - ") = (\n", + "(ratio_smeared_lookups, ratio_lnN_smeared_lookups, ratio_sys_up, ratio_sys_down,) = (\n", " [],\n", " [],\n", " [],\n", diff --git a/src/HHbbVV/scale_factors/top_reweighting.ipynb b/src/HHbbVV/scale_factors/top_reweighting.ipynb index 40e75e81..b8af2dc6 100644 --- a/src/HHbbVV/scale_factors/top_reweighting.ipynb +++ b/src/HHbbVV/scale_factors/top_reweighting.ipynb @@ -70,22 +70,13 @@ "events = nanoevents.NanoEventsFactory.from_root(\n", " # \"/eos/uscms/store/user/lpcpfnano/cmantill/v2_3/2017/HH_gen/GluGluToHHTobbVV_node_cHHH1_TuneCP5_13TeV-powheg-pythia8/GluGluToHHTobbVV_node_cHHH1/221017_221918/0000/nano_mc2017_100.root\",\n", " # \"/eos/uscms/store/user/lpcpfnano/drankin/v2_2/2017/TTbar/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/TTToSemiLeptonic_ext1/211112_132937/0000/nano_mc2017_100.root\",\n", - " \"../../../../data/nano_files/v2_3/2018/TTToSemiLeptonic/nano_mc2018_101.root\",\n", - " # \"../../../../data/nano_files/v2_3/2018/QCD_HT700to1000/nano_mc2018_10.root\",\n", + " # \"../../../../data/nano_files/v2_3/2018/TTToSemiLeptonic/nano_mc2018_101.root\",\n", + " \"../../../../data/nano_files/v2_3/2018/QCD_HT700to1000/nano_mc2018_10.root\",\n", " # \"root://cmsxrootd.fnal.gov///store/user/lpcpfnano/cmantill/v2_3/2018/TTbar/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8/TTToSemiLeptonic/220808_151244/0000/nano_mc2018_1-196.root\",\n", " schemaclass=nanoevents.NanoAODSchema,\n", ").events()" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "events.MET.phi" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -267,105 +258,6 @@ "min_muon_j_dr = ak.min(muon_j_dr, axis=1)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "closest_j = events.Jet[muon_j_dr == min_muon_j_dr][:, 0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "muon.dot(closest_j)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "_ = plt.hist(min_muon_jet_dr)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "electron = events.Electron" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "(met + muon).pt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist((met + muon).pt, histtype=\"step\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sel = ak.fill_none(muon_selector * met_sel, False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.sum(sel)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.mean((met + muon)[sel].pt > 100)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "electron.isTight" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "np.mean(ak.any(electron[sel].isTight, axis=1))" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/src/run_utils.py b/src/run_utils.py index 8b06417a..4bbded82 100644 --- a/src/run_utils.py +++ b/src/run_utils.py @@ -142,7 +142,7 @@ def get_processor( elif processor == "ttsfs": from HHbbVV.processors import TTScaleFactorsSkimmer - return TTScaleFactorsSkimmer(xsecs=get_xsecs()) + return TTScaleFactorsSkimmer(xsecs=get_xsecs(), inference=inference) elif processor == "xhy": from HHbbVV.processors import XHYProcessor @@ -182,12 +182,13 @@ def parse_common_args(parser): parser.add_argument("--label", default="AK15_H_VV", help="label", type=str) parser.add_argument("--njets", default=2, help="njets", type=int) - # bbVVSkimmer args - # REMEMBER TO PROPAGATE THIS TO SUBMIT TEMPLATE!! + # REMEMBER TO PROPAGATE THESE TO SUBMIT TEMPLATE!! + # processor args + add_bool_arg(parser, "inference", default=True, help="run inference for ak8 jets") + # bbVVSkimmer-only args add_bool_arg(parser, "save-ak15", default=False, help="run inference for and save ak15 jets") add_bool_arg(parser, "save-systematics", default=False, help="save systematic variations") add_bool_arg(parser, "save-all", default=True, help="save all branches") - add_bool_arg(parser, "inference", default=True, help="run inference for ak8 jets") add_bool_arg( parser, "vbf-search", default=False, help="run selections for VBF production search" )