diff --git a/VBFgenInfoTests.ipynb b/VBFgenInfoTests.ipynb index d226c2c6..90cb3778 100644 --- a/VBFgenInfoTests.ipynb +++ b/VBFgenInfoTests.ipynb @@ -428,16 +428,11 @@ } ], "source": [ -<<<<<<< HEAD "df = pd.read_parquet('0-2.parquet')\n", "\n", "df.columns.values.tolist()\n", "df\n", " " -======= - "df = pd.read_parquet(\"0-10.parquet\")\n", - "df.columns.values.tolist()" ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 ] }, { @@ -473,10 +468,9 @@ "source": [ "# Apply lepton veto selections ($N_\\mu =0$ and $N_e=0$)\n", "print(np.shape(df))\n", - "df_em = df[(df[(\"nGoodMuons\", 0)] == 0) & (df[(\"nGoodElectrons\", 0)] == 0)]\n", + "df_em = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0)]\n", "\n", "# 2 vbf jets\n", -<<<<<<< HEAD "#df_vbf = df[ (df[('nGoodVBFJetsUnsorted', 0)] >= 2)]\n", "\n", "# lepton veto and 2 vbf jets\n", @@ -495,47 +489,6 @@ "print('tsts',np.sum(df_sorted_pt['weight']),np.sum(df_sorted_pt['weight_noxsec']))\n", "df_sorted_pt\n", "print(np.sum(df_sorted_pt['weight_noxsec']))\n" -======= - "df_vbf = df[(df[(\"nGoodVBFJetsUnsorted\", 0)] >= 2)]\n", - "\n", - "# lepton veto and 2 vbf jets\n", - "df_unsorted = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(\"nGoodVBFJetsUnsorted\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - "]\n", - "df_sorted_pt = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(\"nGoodVBFJetsSortedpt\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - "]\n", - "df_sorted_M = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(\"nGoodVBFJetsSortedM\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - "]\n", - "df_sorted_eta = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(\"nGoodVBFJetsSortedeta\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - "]\n", - "\n", - "\n", - "# generate all variables that are needed for the ak8 jet selections in VBF HH4b paper.\n", - "# df[('DijetDeltaPhi', 0)] = np.abs(df[('ak8FatJetPhi', 0)] - df[('ak8FatJetPhi', 1)])\n", - "# df[('DijetDeltaEta', 0)] = np.abs(df[('ak8FatJetEta', 0)] - df[('ak8FatJetEta', 1)])\n", - "print(\n", - " np.shape(df)[0],\n", - " np.shape(df_unsorted)[0],\n", - " np.shape(df_sorted_pt)[0],\n", - " np.shape(df_sorted_M)[0],\n", - " np.shape(df_sorted_eta)[0],\n", - ")" ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 ] }, { @@ -690,7 +643,6 @@ " dphi = np.where(dphi < np.pi, dphi, 2 * np.pi - dphi)\n", " return np.sqrt(deta**2 + dphi**2)\n", "\n", -<<<<<<< HEAD "# Calculate delta R for each sorting method and add it to the DataFrame\n", "for df, method in zip([df_sorted_Rand, df_sorted_pt, df_sorted_M, df_sorted_eta], ['Rand', 'pt', 'M', 'eta']):\n", " df[\"deltaR_1_to_0\"] = deltaR(df[('vbfetaGen', 0)], df[('vbfphiGen', 0)],\n", @@ -3167,31 +3119,6 @@ " plt.title(\"Combined drj1VV and drj2VV values\")\n", " plt.xlabel(\"Delta R\")\n", " plt.legend()\n", -======= - "\n", - "# Create a dictionary mapping sorting methods to their dataframes\n", - "sorting_methods_to_dfs = {\n", - " \"Rand\": df_unsorted,\n", - " \"pt\": df_sorted_pt,\n", - " \"M\": df_sorted_M,\n", - " \"eta\": df_sorted_eta,\n", - "}\n", - "\n", - "# Iterate over each DataFrame and modify the column names\n", - "for df_name, df_method in sorting_methods_to_dfs.items():\n", - " df_method.rename(\n", - " columns={(\"vbfphiUnsortedRand\", jet): (\"vbfphiSortedRand\", jet) for jet in [0, 1]},\n", - " inplace=True,\n", - " )\n", - " df_method.rename(\n", - " columns={(\"vbfMUnsortedRand\", jet): (\"vbfMSortedRand\", jet) for jet in [0, 1]}, inplace=True\n", - " )\n", - "\n", - "for df_name, df_method in sorting_methods_to_dfs.items():\n", - " print(f\"{df_name} DataFrame:\\n\")\n", - " print(df_method.columns.values.tolist())\n", - " print(\"\\n---\\n\")\n", ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 "\n", " # All variables combined\n", " plt.subplot(2, 1, 2)\n", @@ -3203,31 +3130,12 @@ " plt.ylabel(\"Number of Events\")\n", " plt.legend()\n", "\n", -<<<<<<< HEAD " plt.tight_layout()\n", " plt.show()\n", "\n", "# The function call remains the same:\n", "process_and_plot_refactored('output_deltaRtruthinfo.txt')\n", "process_and_plot_refactored('output_deltaRtruthinfo_bbtagging.txt')" -======= - "# Iterate over each sorting method and calculate delta R\n", - "for method, df_method in sorting_methods_to_dfs.items():\n", - " for jet in [0, 1]:\n", - " eta_gen = df_method[(f\"vbfetaGen\", jet)]\n", - " phi_gen = df_method[(f\"vbfphiGen\", jet)]\n", - "\n", - " eta_sorted = df_method[(f\"vbfetaSorted{method}\", jet)]\n", - " phi_sorted = df_method[(f\"vbfphiSorted{method}\", jet)]\n", - "\n", - " df_method[(f\"deltaR_{method}\", jet)] = deltaR(eta_gen, phi_gen, eta_sorted, phi_sorted)\n", - "\n", - "# Print DataFrames to check results\n", - "for df_name, df_method in sorting_methods_to_dfs.items():\n", - " print(f\"{df_name} DataFrame:\\n\")\n", - " print(df_method)\n", - " print(\"\\n---\\n\")" ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 ] }, { diff --git a/VBFjetsVisualizePrototype.ipynb b/VBFjetsVisualizePrototype.ipynb index 36cf75cd..2d534530 100644 --- a/VBFjetsVisualizePrototype.ipynb +++ b/VBFjetsVisualizePrototype.ipynb @@ -455,7 +455,6 @@ } ], "source": [ -<<<<<<< HEAD "df = pd.read_parquet('0--1.parquet')\n", "#df = pd.read_pickle('outfiles/0-2.pkl')\n", "#print(df)\n", @@ -473,10 +472,6 @@ "# ('vbfptGen', 0), ('vbfptGen', 1), ('vbfetaGen', 0), ('vbfetaGen', 1), ('vbfphiGen', 0), ('vbfphiGen', 1), ('vbfMGen', 0), ('vbfMGen', 1)\n", "# ('ak8FatJetEta', 0), ('ak8FatJetEta', 1), ('ak8FatJetPhi', 0), ('ak8FatJetPhi', 1), ('ak8FatJetMass', 0), ('ak8FatJetMass', 1), ('ak8FatJetPt', 0), ('ak8FatJetPt', 1)\n", " " -======= - "df = pd.read_parquet(\"0--1.parquet\")\n", - "df.columns.values.tolist()" ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 ] }, { @@ -504,10 +499,9 @@ "source": [ "# Apply lepton veto selections ($N_\\mu =0$ and $N_e=0$)\n", "print(np.shape(df))\n", - "df_em = df[(df[(\"nGoodMuons\", 0)] == 0) & (df[(\"nGoodElectrons\", 0)] == 0)]\n", + "df_em = df[(df[('nGoodMuons', 0)] == 0) & (df[('nGoodElectrons', 0)] == 0)]\n", "\n", "# 2 vbf jets\n", -<<<<<<< HEAD "#df_vbf = df[ (df[('nGoodVBFJetsUnsorted', 0)] >= 2)]\n", "\n", "# lepton veto and 2 vbf jets\n", @@ -525,47 +519,6 @@ "#df[('DijetDeltaEta', 0)] = np.abs(df[('ak8FatJetEta', 0)] - df[('ak8FatJetEta', 1)])\n", "#print(np.shape(df)[0],np.shape(df_unsorted)[0],np.shape(df_sorted_pt)[0],np.shape(df_sorted_M)[0],np.shape(df_sorted_eta)[0])\n", "\n" -======= - "df_vbf = df[(df[(\"nGoodVBFJetsUnsorted\", 0)] >= 2)]\n", - "\n", - "# lepton veto and 2 vbf jets\n", - "df_unsorted = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(\"nGoodVBFJetsUnsorted\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - "]\n", - "df_sorted_pt = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(\"nGoodVBFJetsSortedpt\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - "]\n", - "df_sorted_M = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(\"nGoodVBFJetsSortedM\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - "]\n", - "df_sorted_eta = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(\"nGoodVBFJetsSortedeta\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - "]\n", - "\n", - "\n", - "# generate all variables that are needed for the ak8 jet selections in VBF HH4b paper.\n", - "# df[('DijetDeltaPhi', 0)] = np.abs(df[('ak8FatJetPhi', 0)] - df[('ak8FatJetPhi', 1)])\n", - "# df[('DijetDeltaEta', 0)] = np.abs(df[('ak8FatJetEta', 0)] - df[('ak8FatJetEta', 1)])\n", - "print(\n", - " np.shape(df)[0],\n", - " np.shape(df_unsorted)[0],\n", - " np.shape(df_sorted_pt)[0],\n", - " np.shape(df_sorted_M)[0],\n", - " np.shape(df_sorted_eta)[0],\n", - ")" ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 ] }, { @@ -622,64 +575,40 @@ } ], "source": [ -<<<<<<< HEAD "variables = ['ak8FatJetMass', 'ak8FatJetPt','ak8FatJetEta', 'DijetPt', 'DijetMass','DijetDeltaPhi','DijetDeltaEta'\n", " ,'nGoodJets','vbfpt','vbfM']\n", -======= - "variables = [\n", - " \"ak8FatJetMass\",\n", - " \"ak8FatJetPt\",\n", - " \"DijetPt\",\n", - " \"DijetMass\",\n", - " \"DijetDeltaPhi\",\n", - " \"DijetDeltaEta\",\n", - " \"nGoodJets\",\n", - " \"vbfpt\",\n", - " \"vbfM\",\n", - "]\n", ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 - "\n", - "# Determine grid size:\n", + "\n", + "# Determine grid size: \n", "grid_size = (np.ceil(len(variables) / 2).astype(int), 2)\n", "\n", "binnums = 20\n", "\n", - "fig, axs = plt.subplots(*grid_size, figsize=(10, 4 * grid_size[0]))\n", + "fig, axs = plt.subplots(*grid_size, figsize=(10, 4*grid_size[0]))\n", "axs = axs.flatten() # Flatten to 1D for easier iteration\n", "\n", "# Loop over variables and their corresponding subplots\n", "for ax, var in zip(axs, variables):\n", " # Check if variable has two entries (0 and 1)\n", " if (var, 1) in df.columns:\n", - " for i, label in zip([0, 1], [\"H-bb\", \"H-VV\"]):\n", + " for i, label in zip([0, 1], ['H-bb', 'H-VV']):\n", " column = df[(var, i)]\n", " if i == 0:\n", - " binwidth = int((max(column) - min(column)) / binnums) + 0.1\n", - " ax.hist(\n", - " column,\n", - " bins=np.arange(min(column), max(column) + binwidth, binwidth),\n", - " alpha=0.5,\n", - " label=label,\n", - " )\n", + " binwidth = int((max(column) - min(column))/binnums) +0.1\n", + " ax.hist(column, bins=np.arange(min(column), max(column) + binwidth, binwidth), alpha=0.5, label=label)\n", " else: # Variable has only one entry (0)\n", " column = df[(var, 0)]\n", - " binwidth = int((max(column) - min(column)) / binnums) + 0.1\n", - " ax.hist(\n", - " column,\n", - " bins=np.arange(min(column), max(column) + binwidth, binwidth),\n", - " alpha=0.5,\n", - " label=var,\n", - " )\n", - "\n", - " ax.set_xlabel(\"Value\")\n", - " ax.set_ylabel(\"Count\")\n", - " ax.set_title(f\"Distribution of {var}\")\n", + " binwidth = int((max(column) - min(column))/binnums) + 0.1\n", + " ax.hist(column, bins=np.arange(min(column), max(column) + binwidth, binwidth), alpha=0.5, label=var)\n", + "\n", + " ax.set_xlabel('Value')\n", + " ax.set_ylabel('Count')\n", + " ax.set_title(f'Distribution of {var}')\n", " ax.legend()\n", " ax.grid(True)\n", "\n", "# Remove unused subplots\n", "if len(variables) < len(axs):\n", - " for ax in axs[len(variables) :]:\n", + " for ax in axs[len(variables):]:\n", " fig.delaxes(ax)\n", "\n", "plt.tight_layout()\n", @@ -705,22 +634,20 @@ ], "source": [ "# example plotting masses of fatjets.\n", - "ak8FatJetMass_bb = df[(\"vbfM\", 0)] # Mass of H-bb\n", - "ak8FatJetMass_vv = df[(\"vbfM\", 1)] # Mass of H-VV\n", + "ak8FatJetMass_bb = df[('vbfM', 0)] # Mass of H-bb\n", + "ak8FatJetMass_vv = df[('vbfM', 1)] # Mass of H-VV\n", "\n", "plt.figure(figsize=(10, 8))\n", "\n", "# plot histogram for H-bb\n", - "plt.hist(\n", - " ak8FatJetMass_bb, bins=range(min(data), max(data) + binwidth, binwidth), alpha=0.5, label=\"H-bb\"\n", - ")\n", + "plt.hist(ak8FatJetMass_bb, bins=range(min(data), max(data) + binwidth, binwidth), alpha=0.5, label='H-bb')\n", "\n", "# plot histogram for H-VV\n", - "plt.hist(ak8FatJetMass_vv, bins=50, alpha=0.5, label=\"H-VV\")\n", + "plt.hist(ak8FatJetMass_vv, bins=50, alpha=0.5, label='H-VV')\n", "\n", - "plt.xlabel(\"Jet Mass\")\n", - "plt.ylabel(\"Count\")\n", - "plt.title(\"Jet Mass Distribution\")\n", + "plt.xlabel('Jet Mass')\n", + "plt.ylabel('Count')\n", + "plt.title('Jet Mass Distribution')\n", "plt.legend()\n", "plt.grid(True)\n", "plt.show()" @@ -744,10 +671,10 @@ "def process_and_plot(df, sort_type, custom_variables=None):\n", " # Variables for each sorting type\n", " variables_dict = {\n", - " \"Sortedpt\": [\"vbfptSortedpt\", \"vbfetaSortedpt\", \"vbfphiSortedpt\", \"vbfMSortedpt\"],\n", - " \"SortedM\": [\"vbfptSortedM\", \"vbfetaSortedM\", \"vbfphiSortedM\", \"vbfMSortedM\"],\n", - " \"Sortedeta\": [\"vbfptSortedeta\", \"vbfetaSortedeta\", \"vbfphiSortedeta\", \"vbfMSortedeta\"],\n", - " \"Unsorted\": [\"nGoodVBFJetsUnsorted\"],\n", + " 'Sortedpt': ['vbfptSortedpt','vbfetaSortedpt','vbfphiSortedpt','vbfMSortedpt' ],\n", + " 'SortedM': ['vbfptSortedM','vbfetaSortedM','vbfphiSortedM','vbfMSortedM' ],\n", + " 'Sortedeta': ['vbfptSortedeta','vbfetaSortedeta','vbfphiSortedeta','vbfMSortedeta' ],\n", + " 'Unsorted': ['nGoodVBFJetsUnsorted']\n", " }\n", "\n", " # Retrieve the corresponding variables\n", @@ -758,61 +685,59 @@ " variables += custom_variables\n", "\n", " # Apply mask to dataframe based on the sort_type\n", - " df = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(f\"nGoodVBFJets{sort_type}\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - " ]\n", + " df = df[(df[('nGoodMuons', 0)] == 0) \n", + " & (df[('nGoodElectrons', 0)] == 0) \n", + " & (df[(f'nGoodVBFJets{sort_type}', 0)] >= 2)\n", + " & (df[('nGoodJets', 0)] == 0)]\n", "\n", - " # Determine grid size:\n", + " # Determine grid size: \n", " grid_size = (np.ceil(len(variables) / 2).astype(int), 2)\n", "\n", - " fig, axs = plt.subplots(*grid_size, figsize=(10, 4 * grid_size[0]))\n", + " fig, axs = plt.subplots(*grid_size, figsize=(10, 4*grid_size[0]))\n", " axs = axs.flatten() # Flatten to 1D for easier iteration\n", "\n", " # Loop over variables and their corresponding subplots\n", " for ax, var in zip(axs, variables):\n", " # Determine bins based on variable type\n", - " if \"pt\" in var:\n", + " if 'pt' in var:\n", " bins = np.linspace(0, 1500, 51)\n", - " elif \"mass\" in var:\n", + " elif 'mass' in var:\n", " bins = np.linspace(0, 3000, 51)\n", - " elif \"phi\" in var:\n", + " elif 'phi' in var:\n", " bins = np.linspace(0, 5, 51)\n", " else:\n", " bins = 50 # default binning if variable type is not recognized\n", - " if \"vbfpt\" in var:\n", + " if 'vbfpt' in var:\n", " bins = np.linspace(0, 500, 51)\n", - " if \"vbfeta\" in var:\n", + " if 'vbfeta' in var:\n", " bins = np.linspace(-5, 5, 51)\n", - " if \"vbfM\" in var:\n", + " if 'vbfM' in var:\n", " bins = np.linspace(0, 100, 51)\n", - " if \"vbfphi\" in var:\n", + " if 'vbfphi' in var:\n", " bins = np.linspace(-1, 5, 51)\n", "\n", " # Check if variable has two entries (0 and 1)\n", " if (var, 1) in df.columns:\n", " for i in [0, 1]:\n", " column = df[(var, i)]\n", - " ax.hist(column, bins=bins, alpha=0.5, label=f\"Jet {i+1}\")\n", + " ax.hist(column, bins=bins, alpha=0.5, label=f'Jet {i+1}')\n", " else: # Variable has only one entry (0)\n", " column = df[(var, 0)]\n", " ax.hist(column, bins=bins, alpha=0.5, label=var)\n", "\n", - " ax.set_xlabel(\"Value\")\n", - " ax.set_ylabel(\"Count\")\n", - " ax.set_title(f\"Distribution of {var}\")\n", + " ax.set_xlabel('Value')\n", + " ax.set_ylabel('Count')\n", + " ax.set_title(f'Distribution of {var}')\n", " ax.legend()\n", " ax.grid(True)\n", "\n", " # Remove unused subplots\n", " if len(variables) < len(axs):\n", - " for ax in axs[len(variables) :]:\n", + " for ax in axs[len(variables):]:\n", " fig.delaxes(ax)\n", "\n", " plt.tight_layout()\n", - " plt.show()" + " plt.show()\n" ] }, { @@ -833,9 +758,9 @@ } ], "source": [ - "variables = [\"ak8FatJetMass\", \"ak8FatJetPt\", \"DijetPt\", \"DijetMass\"]\n", - "# df = pd.read_parquet('0--1.parquet')\n", - "process_and_plot(df, \"Sortedpt\", variables)\n", + "variables = ['ak8FatJetMass', 'ak8FatJetPt', 'DijetPt', 'DijetMass']\n", + "#df = pd.read_parquet('0--1.parquet')\n", + "process_and_plot(df, 'Sortedpt', variables)\n", "#'Unsorted': ['vbfptUnsorted','vbfetaUnsorted','vbfphiUnsorted','vbfMUnsorted','nGoodVBFJetsUnsorted']" ] }, @@ -846,37 +771,13 @@ "metadata": {}, "outputs": [], "source": [ - "def process_and_plot_all(df, num_bin=50, custom_variables=None):\n", + "def process_and_plot_all(df, num_bin=50,custom_variables=None):\n", " # Variables for each sorting type\n", " variables_dict = {\n", - " \"Unsorted\": [\n", - " \"vbfptUnsorted\",\n", - " \"vbfetaUnsorted\",\n", - " \"vbfphiUnsorted\",\n", - " \"vbfMUnsorted\",\n", - " \"nGoodVBFJetsUnsorted\",\n", - " ],\n", - " \"Sortedpt\": [\n", - " \"vbfptSortedpt\",\n", - " \"vbfetaSortedpt\",\n", - " \"vbfphiSortedpt\",\n", - " \"vbfMSortedpt\",\n", - " \"nGoodVBFJetsSortedpt\",\n", - " ],\n", - " \"SortedM\": [\n", - " \"vbfptSortedM\",\n", - " \"vbfetaSortedM\",\n", - " \"vbfphiSortedM\",\n", - " \"vbfMSortedM\",\n", - " \"nGoodVBFJetsSortedM\",\n", - " ],\n", - " \"Sortedeta\": [\n", - " \"vbfptSortedeta\",\n", - " \"vbfetaSortedeta\",\n", - " \"vbfphiSortedeta\",\n", - " \"vbfMSortedeta\",\n", - " \"nGoodVBFJetsSortedeta\",\n", - " ],\n", + " 'Unsorted': ['vbfptUnsorted','vbfetaUnsorted','vbfphiUnsorted','vbfMUnsorted','nGoodVBFJetsUnsorted'],\n", + " 'Sortedpt': ['vbfptSortedpt','vbfetaSortedpt','vbfphiSortedpt','vbfMSortedpt','nGoodVBFJetsSortedpt'],\n", + " 'SortedM': ['vbfptSortedM','vbfetaSortedM','vbfphiSortedM','vbfMSortedM','nGoodVBFJetsSortedM'],\n", + " 'Sortedeta': ['vbfptSortedeta','vbfetaSortedeta','vbfphiSortedeta','vbfMSortedeta','nGoodVBFJetsSortedeta']\n", " }\n", "\n", " # Add custom variables if provided\n", @@ -887,69 +788,60 @@ " # Determine grid size\n", " grid_size = (max(len(vars) for vars in variables_dict.values()), len(variables_dict))\n", "\n", - " fig, axs = plt.subplots(*grid_size, figsize=(10 * grid_size[1], 4 * grid_size[0]))\n", + " fig, axs = plt.subplots(*grid_size, figsize=(10*grid_size[1], 4*grid_size[0]))\n", "\n", " # Loop over sorting types and their corresponding subplot columns\n", " for sort_type, col_axs in zip(variables_dict.keys(), axs.T):\n", " # Apply mask to dataframe based on the sort_type\n", - " df_filtered = df[\n", - " (df[(\"nGoodMuons\", 0)] == 0)\n", - " & (df[(\"nGoodElectrons\", 0)] == 0)\n", - " & (df[(f\"nGoodVBFJets{sort_type}\", 0)] >= 2)\n", - " & (df[(\"nGoodJets\", 0)] == 0)\n", - " ]\n", + " df_filtered = df[(df[('nGoodMuons', 0)] == 0) \n", + " & (df[('nGoodElectrons', 0)] == 0) \n", + " & (df[(f'nGoodVBFJets{sort_type}', 0)] >= 2)\n", + " & (df[('nGoodJets', 0)] == 0)]\n", "\n", " # Loop over variables and their corresponding subplots\n", " for ax, var in zip(col_axs, variables_dict[sort_type]):\n", " # Determine bins based on variable type\n", - " if \"pt\" in var:\n", - " bins = np.linspace(0, 1500, num_bin + 1)\n", - " elif \"mass\" in var:\n", - " bins = np.linspace(0, 3000, num_bin + 1)\n", - " elif \"phi\" in var:\n", - " bins = np.linspace(0, 5, num_bin + 1)\n", + " if 'pt' in var:\n", + " bins = np.linspace(0, 1500, num_bin+1)\n", + " elif 'mass' in var:\n", + " bins = np.linspace(0, 3000, num_bin+1)\n", + " elif 'phi' in var:\n", + " bins = np.linspace(0, 5, num_bin+1)\n", " else:\n", " bins = num_bin # default binning if variable type is not recognized\n", - "\n", - " if \"vbfpt\" in var:\n", - " bins = np.linspace(0, 500, num_bin + 1)\n", - " if \"vbfeta\" in var:\n", - " bins = np.linspace(-5, 5, num_bin + 1)\n", - " if \"vbfM\" in var:\n", - " bins = np.linspace(0, 100, num_bin + 1)\n", - " if \"vbfphi\" in var:\n", - " bins = np.linspace(-1, 5, num_bin + 1)\n", - "\n", - " weights = df_filtered[(\"weight\", 0)]\n", + " \n", + " if 'vbfpt' in var:\n", + " bins = np.linspace(0, 500, num_bin+1)\n", + " if 'vbfeta' in var:\n", + " bins = np.linspace(-5, 5, num_bin+1)\n", + " if 'vbfM' in var:\n", + " bins = np.linspace(0, 100, num_bin+1)\n", + " if 'vbfphi' in var:\n", + " bins = np.linspace(-1, 5, num_bin+1)\n", + " \n", + " weights = df_filtered[('weight', 0)]\n", "\n", " # Check if variable has two entries (0 and 1)\n", " if (var, 1) in df_filtered.columns:\n", " for i in [0, 1]:\n", " column = df_filtered[(var, i)]\n", - " ax.hist(\n", - " column,\n", - " bins=bins,\n", - " alpha=0.5,\n", - " label=f\"Jet {i+1}\",\n", - " density=True,\n", - " weights=weights,\n", - " )\n", + " ax.hist(column, bins=bins, alpha=0.5, label=f'Jet {i+1}',density=True,weights=weights)\n", " else: # Variable has only one entry (0)\n", " column = df_filtered[(var, 0)]\n", - " ax.hist(column, bins=bins, alpha=0.5, label=var, density=True, weights=weights)\n", + " ax.hist(column, bins=bins, alpha=0.5, label=var,density=True,weights=weights)\n", "\n", - " ax.set_xlabel(\"Value\")\n", - " ax.set_ylabel(\"Count\")\n", - " ax.set_title(f\"Distribution of {var} ({sort_type})\")\n", + " ax.set_xlabel('Value')\n", + " ax.set_ylabel('Count')\n", + " ax.set_title(f'Distribution of {var} ({sort_type})')\n", " ax.legend()\n", " ax.grid(True)\n", "\n", " # Remove unused subplots\n", - " for ax in col_axs[len(variables_dict[sort_type]) :]:\n", + " for ax in col_axs[len(variables_dict[sort_type]):]:\n", " fig.delaxes(ax)\n", "\n", " plt.tight_layout()\n", - " plt.show()" + " plt.show()\n" ] }, { @@ -970,9 +862,9 @@ } ], "source": [ - "variables = [\"ak8FatJetMass\", \"ak8FatJetPt\", \"DijetPt\", \"DijetMass\"]\n", - "df = pd.read_parquet(\"0-10.parquet\")\n", - "process_and_plot_all(df, num_bin=50, custom_variables=variables)\n", + "variables = ['ak8FatJetMass', 'ak8FatJetPt', 'DijetPt', 'DijetMass']\n", + "df = pd.read_parquet('0-10.parquet')\n", + "process_and_plot_all(df, num_bin=50,custom_variables=variables)\n", "#'Unsorted': ['vbfptUnsorted','vbfetaUnsorted','vbfphiUnsorted','vbfMUnsorted','nGoodVBFJetsUnsorted']" ] }, @@ -1082,23 +974,21 @@ "source": [ "def compare_filter_effects(df):\n", " # Variables for each sorting type\n", - " sort_types = [\"Unsorted\", \"Sortedpt\", \"SortedM\", \"Sortedeta\"]\n", + " sort_types = ['Unsorted', 'Sortedpt', 'SortedM', 'Sortedeta']\n", "\n", " # Different masks to apply\n", " masks = [\n", - " (\"No Muons\", df[(\"nGoodMuons\", 0)] == 0),\n", - " (\"No Electrons\", df[(\"nGoodElectrons\", 0)] == 0),\n", - " (\"tt BG mask\", df[(\"nGoodJets\", 0)] == 0),\n", + " ('No Muons', df[('nGoodMuons', 0)] == 0),\n", + " ('No Electrons', df[('nGoodElectrons', 0)] == 0),\n", + " ('tt BG mask', df[('nGoodJets', 0)] == 0)\n", " ]\n", "\n", " # Create an empty dataframe to store the results\n", - " results = pd.DataFrame(columns=[\"Sorting Method/Mask\", \"Weighted Number of Jets\"])\n", - "\n", - " # Include original DataFrame first\n", - " weighted_jet_count = df[(\"weight\", 0)].sum()\n", - " original_row = pd.DataFrame(\n", - " {\"Sorting Method/Mask\": [\"Original\"], \"Weighted Number of Jets\": [weighted_jet_count]}\n", - " )\n", + " results = pd.DataFrame(columns=['Sorting Method/Mask', 'Weighted Number of Jets'])\n", + " \n", + " # Include original DataFrame first\n", + " weighted_jet_count = df[('weight', 0)].sum()\n", + " original_row = pd.DataFrame({'Sorting Method/Mask': ['Original'], 'Weighted Number of Jets': [weighted_jet_count]})\n", " results = pd.concat([results, original_row], ignore_index=True)\n", "\n", " # Loop over sort types and masks\n", @@ -1109,28 +999,22 @@ " df_temp = df_temp[mask] # Apply the mask\n", "\n", " # 'At least 2 VBF Jets' mask is a function, apply it separately\n", - " df_temp = df_temp[df_temp[(f\"nGoodVBFJets{sort_type}\", 0)] >= 2]\n", - " mask_name = \"At least 2 VBF Jets\"\n", + " df_temp = df_temp[df_temp[(f'nGoodVBFJets{sort_type}', 0)] >= 2]\n", + " mask_name = 'At least 2 VBF Jets'\n", "\n", " # Store the number of remaining jets along with the corresponding sorting method and mask\n", " # Instead of counting rows, sum the weights\n", - " weighted_jet_count = df_temp[(\"weight\", 0)].sum()\n", - " new_row = pd.DataFrame(\n", - " {\n", - " \"Sorting Method/Mask\": [f\"{sort_type} - {mask_name}\"],\n", - " \"Weighted Number of Jets\": [weighted_jet_count],\n", - " }\n", - " )\n", + " weighted_jet_count = df_temp[('weight', 0)].sum()\n", + " new_row = pd.DataFrame({'Sorting Method/Mask': [f'{sort_type} - {mask_name}'], 'Weighted Number of Jets': [weighted_jet_count]})\n", " results = pd.concat([results, new_row], ignore_index=True)\n", "\n", " return results\n", "\n", - "\n", "# Now you can use this function and display the result as a pandas DataFrame:\n", "results = compare_filter_effects(df)\n", "display(results)\n", "\n", - "print(np.shape(df)[0])" + "print(np.shape(df)[0])\n" ] }, { @@ -1418,20 +1302,19 @@ "source": [ "import itertools\n", "\n", - "\n", "def compare_filter_effects(df):\n", " # Variables for each sorting type\n", - " sort_types = [\"Unsorted\", \"Sortedpt\", \"SortedM\", \"Sortedeta\"]\n", + " sort_types = ['Unsorted', 'Sortedpt', 'SortedM', 'Sortedeta']\n", "\n", " # Different masks to apply\n", " masks = [\n", - " (\"No Muons\", df[(\"nGoodMuons\", 0)] == 0),\n", - " (\"No Electrons\", df[(\"nGoodElectrons\", 0)] == 0),\n", - " (\"tt BG mask\", df[(\"nGoodJets\", 0)] == 0),\n", + " ('No Muons', df[('nGoodMuons', 0)] == 0),\n", + " ('No Electrons', df[('nGoodElectrons', 0)] == 0),\n", + " ('tt BG mask', df[('nGoodJets', 0)] == 0)\n", " ]\n", "\n", " # Create an empty dataframe to store the results\n", - " results = pd.DataFrame(columns=[\"Sorting Method/Mask\", \"Number of Jets\"])\n", + " results = pd.DataFrame(columns=['Sorting Method/Mask', 'Number of Jets'])\n", "\n", " # Loop over sort types\n", " for sort_type in sort_types:\n", @@ -1443,32 +1326,25 @@ "\n", " # Generate the mask combination name (sort_type + mask names)\n", " mask_combination_names = [mask_name for mask_name, _ in mask_combination]\n", - " mask_name = (\n", - " f\"{sort_type} - \" + \", \".join(mask_combination_names)\n", - " if mask_combination_names\n", - " else sort_type\n", - " )\n", - "\n", + " mask_name = f'{sort_type} - ' + ', '.join(mask_combination_names) if mask_combination_names else sort_type\n", + " \n", " # Apply each mask in the combination\n", " for _, mask in mask_combination:\n", " df_temp = df_temp[mask]\n", - "\n", + " \n", " # 'At least 2 VBF Jets' mask is a function, apply it separately\n", - " df_temp = df_temp[df_temp[(f\"nGoodVBFJets{sort_type}\", 0)] >= 2]\n", - " mask_name += \", At least 2 VBF Jets\"\n", + " df_temp = df_temp[df_temp[(f'nGoodVBFJets{sort_type}', 0)] >= 2]\n", + " mask_name += ', At least 2 VBF Jets'\n", "\n", " # Store the number of remaining jets along with the corresponding sorting method and mask\n", - " new_row = pd.DataFrame(\n", - " {\"Sorting Method/Mask\": [mask_name], \"Number of Jets\": [np.shape(df_temp)[0]]}\n", - " )\n", + " new_row = pd.DataFrame({'Sorting Method/Mask': [mask_name], 'Number of Jets': [np.shape(df_temp)[0]]})\n", " results = pd.concat([results, new_row], ignore_index=True)\n", "\n", " return results\n", "\n", - "\n", "# Now you can use this function and display the result as a pandas DataFrame:\n", "results = compare_filter_effects(df)\n", - "display(results)" + "display(results)\n" ] }, { @@ -1500,7 +1376,7 @@ } ], "source": [ - "df[(\"weight\", 0)]" + "df[('weight', 0)]" ] }, { diff --git a/src/HHbbVV/processors/bbVVSkimmer.py b/src/HHbbVV/processors/bbVVSkimmer.py index a5ecc507..8ec9137e 100644 --- a/src/HHbbVV/processors/bbVVSkimmer.py +++ b/src/HHbbVV/processors/bbVVSkimmer.py @@ -221,13 +221,6 @@ def process(self, events: ak.Array): skimmed_events = {} -<<<<<<< HEAD -======= - # jets_ak4 = '' # deal with these guys later. not sure but I think the type is fatjet afterward which breaks things - # if isVBFSearch: - # jets_ak4,jec_shifted_var_ak4 = get_jec_jets(events, year, isData, self.jecs,fatjets = False) - ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 ######################### # Save / derive variables ######################### @@ -301,25 +294,11 @@ def process(self, events: ak.Array): **self.getDijetVars(ak8FatJetVars, bb_mask, mass_shift=label), } -<<<<<<< HEAD # VBF ak4 Jet vars (pt, eta, phi, M, nGoodJets) if isVBFSearch: ak4JetVars = { **self.getVBFVars(events,ak8FatJetVars,bb_mask, isGen="VBF_HHTobbVV" in dataset) } skimmed_events = {**skimmed_events, **ak4JetVars} - logger.warning(len(events)) - -======= - # VBF ak4 Jet vars (jetId,puId, pt, eta, phi, M) - # logger.warning(isVBFSearch) # note that this pads it to exactly 20 jets. maybe there is more. in the future I will make it just the best 2. I am doing this because to reshape we need np instead of ak. - if isVBFSearch: - ak4JetVars = { - **self.getVBFVars(events, ak8FatJetVars, bb_mask) - } # pad_val(jets_ak4[var], 20, axis=1) # consider using selection to make cuts for VBF instead of passing back boolean array or somth - # i can deal wit jet energy corrections later on. (we will isolate this to 2 vbf jets and then be able to pad). 550 might give issues ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 - - skimmed_events = {**skimmed_events, **ak4JetVars} - logger.warning("did thingy!!!!") # this is also quite goofy looking + otherVars = { key: events[var.split("_")[0]]["_".join(var.split("_")[1:])].to_numpy() @@ -713,7 +692,6 @@ def process(self, events: ak.Array): def postprocess(self, accumulator): return accumulator -<<<<<<< HEAD def getVBFVars(self, events: ak.Array, @@ -768,75 +746,6 @@ def getVBFVars(self, electron_muon_overlap_mask = ~(ak.any(e_pairs_mask, axis=-1) | ak.any(m_pairs_mask, axis=-1)) -======= - - def getVBFVars( - self, - events: ak.Array, - ak8FatJetVars: Dict, - bb_mask: np.ndarray, - skim_vars: dict = None, - pt_shift: str = None, - mass_shift: str = None, - ): # NanoEventsArray not imported yet, may not be necessary? - """Computes selections on vbf jet candidates. Sorts jets by pt,delta eta,dijet mass. Computes a nGoodVBFJets thing return skim variables for the first two of the filtered guys and the nGoodVBFJets thingy""" - - # TODO implement use of pt_shift, mass_shift, skim_vars. Decide on best sorting and remove extras. - - vbfVars = {} - jets = events.Jet - - # AK4 jets definition - # The PF candidates associated to pileup vertices are removed from the - # 430 jet constituents using the charged hadron subtraction (CHS) algorithm [22]. - # 431 Jet energy and resolution corrections supplied by the JetMET POG are applied [19, 20], using - # 432 the tags indicated in Table 11. The L1FastJet, L2Relative, and L3Absolute corrections - # 433 for CHS jets are applied to data and simulation. The L2L3Residual corrections are applied - # 434 to the data. The selected jets are required to have pT > 25 GeV and to be within |η| < 4.7. The - # 435 AK4 jets are also required to satisfy the tight PF Jet identification criteria recommended by the - # 436 JetMET POG [21]. - # 437 In addition, jets with pT < 50 GeV are required to pass medium or tight working points of - # 438 the pileup ID discriminator [23]. Corrective scale factors for the pileup ID are also applied as - # 439 described in Section 9.2. - - ak4_jet_mask = ( - (jets.pt > 25) - & (np.abs(jets.eta) < 4.7) - & (jets.jetId != 1) - & ((jets.pt > 50) | ((jets.puId == 7) | (jets.puId == 7))) - ) - - # was charged hadron subtraction already performed? I think so before to the nano. not sure abt correctons but I think also. - - # then we need to filter based on the placement of the jets: - - # To identify the two tag jets, a collection of AK4 jets with charged hadron subtraction (CHS) with - # 743 pT > 25 GeV, |η| < 4.7, and other preselections detailed in Section 5.4 is considered. The VBF - # 744 tag jet candidates (j) must not overlap with reconstructed electrons, muons or the two Higgs - # 745 candidate AK8 jets: ∆R(j, e) > 0.4, ∆R(j, µ) > 0.4 and ∆R(j, AK8) > 1.2. For electrons and - # 746 muons, looser selection criteria than the ones used in lepton veto are applied: any reconstructed - # 747 electrons (muons) with pT > 5 (7) GeV are vetoed, without requiring additional identification - # 748 or isolation criteria. - - # reconstructing electrons + muons for delta R calculations. we need to first filter the electrons and muons so that only - # the low pt ones remain. Then we must calculate delta r with each of the possible vbfs and remove them based on that. - - # compute mask for electron/muon overlap - electrons, muons = events.Electron[events.Electron.pt < 5], events.Muon[events.Muon.pt < 7] - e_pairs = ak.cartesian([jets, electrons], nested=True) - e_pairs_mask = ( - np.abs(e_pairs.slot0.delta_r(e_pairs.slot1)) < 0.4 - ) # this may not work due to type. just compute as dist in phi eta plane - m_pairs = ak.cartesian([jets, muons], nested=True) - m_pairs_mask = np.abs(m_pairs.slot0.delta_r(m_pairs.slot1)) < 0.4 - # counts = [len(event) for event in jets] - # logger.warning(f"Jets total elements: {counts} len: {len(jets)}") - # counts = [len(event) for event in ak.any(e_pairs_mask, axis=-1)] - # logger.warning(f"Jets total elements: {counts} len: {len(ak.any(e_pairs_mask, axis=-1))}") - electron_muon_overlap_mask = ~( - ak.any(e_pairs_mask, axis=-1) | ak.any(m_pairs_mask, axis=-1) - ) # both should be true initially if electron or muons overlap ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 # Fatjet Definition for ∆R calculations: same definition as in getDijetVars() (not included so we can study its effects in the output.) ptlabel = pt_shift if pt_shift is not None else "" @@ -860,7 +769,6 @@ def getVBFVars( # "M": ak8FatJetVars[f"ak8FatJetParticleNetMass{mlabel}"][~bb_mask], } ) -<<<<<<< HEAD fatjet_overlap_mask = (np.abs(jets.delta_r(bbJet)) > 1.2) & (np.abs(jets.delta_r(VVJet)) > 0.8) # this might not work due to types @@ -997,180 +905,6 @@ def compute_spatial_part(E, px, py, pz, boost_vector, boost_vector_dot_product): "M": ak.flatten(pad_val(vbfJets_sorted_pt[:,0:1].mass, 1, axis=1)), } ) -======= - - fatjet_overlap_mask = (np.abs(jets.delta_r(bbJet)) > 1.2) & ( - np.abs(jets.delta_r(VVJet)) > 1.2 - ) # this might not work due to types - - # compute n_good_vbf_jets + incorporate eta_jj > 4.0 - vbfJets_mask = ak4_jet_mask & electron_muon_overlap_mask & fatjet_overlap_mask - vbfJets = jets[vbfJets_mask] - - # Generating the three different sorting methods: (pt, dijet eta, dijet mass). For each we keep the two best - vbfJets_sorted_pt = vbfJets[ak.argsort(vbfJets.pt, ascending=False)] - vbfJets_sorted_pt = ak.pad_none( - vbfJets_sorted_pt, 2, clip=True - ) # this is the only which does not guarantee two guys. in the other sorts, the entries are specifically None. - - # dijet eta sorting - jet_pairs = ak.combinations(vbfJets, 2, axis=1, fields=["j1", "j2"]) - delta_eta = np.abs(jet_pairs.j1.eta - jet_pairs.j2.eta) - eta_sorted_pairs = jet_pairs[ - ak.argsort(delta_eta, axis=1, ascending=False) - ] # picks the two furthest jets. - eta_first_pairs = ak.firsts(eta_sorted_pairs, axis=1) - eta_sorted_mask = ak.any( - (vbfJets[:, :, None].eta == eta_first_pairs.j1.eta) - | (vbfJets[:, :, None].eta == eta_first_pairs.j2.eta), - axis=2, - ) - vbfJets_sorted_eta = vbfJets[eta_sorted_mask] - - # dijet mass sorting - jj = jet_pairs.j1 + jet_pairs.j2 - mass_sorted_pairs = jet_pairs[ - ak.argsort(jj.mass, axis=1, ascending=False) - ] # picks the two furthest jets. - mass_first_pairs = ak.firsts(mass_sorted_pairs, axis=1) - mass_sorted_mask = ak.any( - (vbfJets[:, :, None].mass == mass_first_pairs.j1.mass) - | (vbfJets[:, :, None].mass == mass_first_pairs.j2.mass), - axis=2, - ) - vbfJets_sorted_mass = vbfJets[mass_sorted_mask] - - # Compute dijet eta and dijet mass cuts - jj_sorted_mass = ( - mass_first_pairs.j1 + mass_first_pairs.j2 - ) # we update dijet since the previous one had many per event. this should be one number per event. - mass_jj_cut_sorted_mass = jj_sorted_mass.mass > 500 - eta_jj_cut_sorted_mass = np.abs(mass_first_pairs.j1.eta - mass_first_pairs.j2.eta) > 4.0 - vbfJets_mask_sorted_mass = vbfJets_mask * mass_jj_cut_sorted_mass * eta_jj_cut_sorted_mass - - jj_sorted_eta = eta_first_pairs.j1 + eta_first_pairs.j2 - mass_jj_cut_sorted_eta = jj_sorted_eta.mass > 500 - eta_jj_cut_sorted_eta = np.abs(eta_first_pairs.j1.eta - eta_first_pairs.j2.eta) > 4.0 - vbfJets_mask_sorted_eta = vbfJets_mask * mass_jj_cut_sorted_eta * eta_jj_cut_sorted_eta - - # here is a really slow way to compute pt mass - # pt_jet_pairs = ak.combinations(vbfJets_sorted_pt, 2, axis=1, fields=["j1", "j2"]) # we have to compute the pairs of pt to calculate the mass. - # delta_pt = np.abs(pt_jet_pairs.j1.pt + pt_jet_pairs.j2.eta) - # pt_sorted_pairs = pt_jet_pairs[ak.argsort(delta_pt, axis=1,ascending= False)] # picks the two furthest jets. - # pt_first_pairs = ak.firsts(pt_sorted_pairs, axis=1) - # jj_sorted_pt = pt_first_pairs.j1 + pt_first_pairs.j2 - # print(jj_sorted_pt.mass) - - # pt sorted eta and dijet mass mask - jj_sorted_pt = vbfJets_sorted_pt[:, 0:1] + vbfJets_sorted_pt[:, 1:2] - mass_jj_cut_sorted_pt = jj_sorted_pt.mass > 500 - eta_jj_cut_sorted_pt = ( - np.abs(vbfJets_sorted_pt[:, 0:1].eta - vbfJets_sorted_pt[:, 1:2].eta) > 4.0 - ) - vbfJets_mask_sorted_pt = vbfJets_mask * mass_jj_cut_sorted_pt * eta_jj_cut_sorted_pt - - n_good_vbf_jets = ak.fill_none( - ak.sum(vbfJets_mask, axis=1), 0 - ) # * eta_jj_mask * mass_jj_mask # filters out the events where the vbf jets are too close and mass too small., May need to convert to 0, 1 array instead of mask. - n_good_vbf_jets_sorted_pt = ak.fill_none(ak.sum(vbfJets_mask_sorted_pt, axis=1), 0) - n_good_vbf_jets_sorted_mass = ak.fill_none(ak.sum(vbfJets_mask_sorted_mass, axis=1), 0) - n_good_vbf_jets_sorted_eta = ak.fill_none(ak.sum(vbfJets_mask_sorted_eta, axis=1), 0) - - # logging.warning(f"Jets eta {jets.eta}") - # logging.warning(f"Jets pt {jets.pt}") - - GEN_FLAGS = ["fromHardProcess"] - vbfGenJets = events.GenPart[events.GenPart.hasFlags(["isHardProcess"])][:, 4:6] - - # logging.warning(f"Gen Jets eta {vbfGenJets.eta}") - # logging.warning(f"Gen Jets mass {vbfGenJets.mass}") - # logging.warning(f"Gen Jets pt {vbfGenJets.pt}") - # logging.warning(f"Gen Jets pdgId {vbfGenJets.pdgId}") - - # self.getVBFGenMatchCount(events,jets) - # self.getVBFGenMatchCount(events,vbfJets) - # self.getVBFGenMatchCount(events,vbfJets[ak.argsort(vbfJets.btagDeepFlavCvL,ascending = False)][:,0:2]) - # self.getVBFGenMatchCount(events,vbfJets_sorted_pt) - # self.getVBFGenMatchCount(events,vbfJets_sorted_mass) - # self.getVBFGenMatchCount(events,vbfJets_sorted_eta) - - # logging.warning(np.sum(ak.sum(ak4_jet_mask, axis=1).to_numpy())) #," ," and final: {",np.sum(ak.sum(vbfJets_mask, axis=1).to_numpy())," compared to initial: {",np.sum(ak.sum(jets, axis=1).to_numpy())) - # logging.warning(np.sum(ak.sum(electron_muon_overlap_mask, axis=1).to_numpy())) - # logging.warning(np.sum(ak.sum(fatjet_overlap_mask, axis=1).to_numpy())) - # logging.warning(np.sum(ak.sum(vbfJets_mask, axis=1).to_numpy())) - # logging.warning(ak.sum(ak.num(jets, axis=1) )) - - # dijet mass must be greater than 500 - # dijet = vbfJets[:,0:1] + vbfJets[:,1:2] - # mass_jj_mask = dijet.mass > 500 - # eta_jj_mask = (np.abs(vbfJets[:,0:1].eta -vbfJets[:,1:2].eta) > 4.0) - # vbfJet1, vbfJet2 = vbfJets[:,0],vbfJets[:,1] - - vbfVars[f"vbfptGen"] = pad_val(vbfGenJets.pt, 2, axis=1) - vbfVars[f"vbfetaGen"] = pad_val(vbfGenJets.eta, 2, axis=1) - vbfVars[f"vbfphiGen"] = pad_val(vbfGenJets.phi, 2, axis=1) - vbfVars[f"vbfMGen"] = pad_val(vbfGenJets.mass, 2, axis=1) - - vbfVars[f"vbfptSortedRand"] = pad_val( - vbfJets[ak.argsort(vbfJets.btagDeepFlavCvL, ascending=False)][:, 0:2].pt, 2, axis=1 - ) - vbfVars[f"vbfetaSortedRand"] = pad_val( - vbfJets[ak.argsort(vbfJets.btagDeepFlavCvL, ascending=False)][:, 0:2].eta, 2, axis=1 - ) - vbfVars[f"vbfphiSortedRand"] = pad_val( - vbfJets[ak.argsort(vbfJets.btagDeepFlavCvL, ascending=False)][:, 0:2].phi, 2, axis=1 - ) - vbfVars[f"vbfMSortedRand"] = pad_val( - vbfJets[ak.argsort(vbfJets.btagDeepFlavCvL, ascending=False)][:, 0:2].mass, 2, axis=1 - ) - - vbfVars[f"vbfptSortedpt"] = pad_val(vbfJets_sorted_pt.pt, 2, axis=1) - vbfVars[f"vbfetaSortedpt"] = pad_val(vbfJets_sorted_pt.eta, 2, axis=1) - vbfVars[f"vbfphiSortedpt"] = pad_val(vbfJets_sorted_pt.phi, 2, axis=1) - vbfVars[f"vbfMSortedpt"] = pad_val(vbfJets_sorted_pt.mass, 2, axis=1) - - vbfVars[f"vbfptSortedM"] = pad_val(vbfJets_sorted_mass.pt, 2, axis=1) - vbfVars[f"vbfetaSortedM"] = pad_val(vbfJets_sorted_mass.eta, 2, axis=1) - vbfVars[f"vbfphiSortedM"] = pad_val(vbfJets_sorted_mass.phi, 2, axis=1) - vbfVars[f"vbfMSortedM"] = pad_val(vbfJets_sorted_mass.mass, 2, axis=1) - - vbfVars[f"vbfptSortedeta"] = pad_val(vbfJets_sorted_eta.pt, 2, axis=1) - vbfVars[f"vbfetaSortedeta"] = pad_val(vbfJets_sorted_eta.eta, 2, axis=1) - vbfVars[f"vbfphiSortedeta"] = pad_val(vbfJets_sorted_eta.phi, 2, axis=1) - vbfVars[f"vbfMSortedeta"] = pad_val(vbfJets_sorted_eta.mass, 2, axis=1) - - vbfVars[ - f"nGoodVBFJetsUnsorted" - ] = ( - n_good_vbf_jets.to_numpy() - ) # the original one does not have jj cuts since it assumes no sorting. - vbfVars[f"nGoodVBFJetsSortedpt"] = n_good_vbf_jets_sorted_pt.to_numpy() - vbfVars[f"nGoodVBFJetsSortedM"] = n_good_vbf_jets_sorted_mass.to_numpy() - vbfVars[f"nGoodVBFJetsSortedeta"] = n_good_vbf_jets_sorted_eta.to_numpy() - - # this is all very ugly but I just want it to work first. later we can decide to pack both jets together. also we can document better. Also we can copy the form of getdijet variables function. also we can implement correctons somehow. unsure of when they apply. - - return vbfVars - - def getVBFGenMatchCount(self, events: ak.Array, jets: ak.Array): - """Computes number of matching jets per event and returns this list. Was used for debugging""" - # NOTE THIS IS ONLY FOR GENERATED SAMPLES THAT THIS WILL RUN. Will delete later or perhaps ammend to the GenSelections Script - - # for each sorting method, we can now compute delta R with the bb jet candidates and calculate if we got it right. - # We will generate a mask that will capture only the vbf jets that match with atleast one of the gen jets - GEN_FLAGS = ["fromHardProcess", "isLastCopy"] - vbfGenJets = events.GenPart[ - ((abs(events.GenPart.pdgId) == 24)) * events.GenPart.hasFlags(GEN_FLAGS) - ] - vbfGenJets = events.GenPart[events.GenPart.hasFlags(["isHardProcess"])][:, 4:6] - - # Create pairs of jets from vbfJets_sorted_pt and two generator jets - jet_pairs = ak.cartesian({"reco": jets, "gen": vbfGenJets[:, 0:2]}) - - # Calculate delta eta and delta phi for each pair - delta_eta = jet_pairs["reco"].eta - jet_pairs["gen"].eta - delta_phi = np.pi - np.abs(np.abs(jet_pairs["reco"].phi - jet_pairs["gen"].phi) - np.pi) ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 vbf2 = vector.array( { @@ -1242,7 +976,6 @@ def getVBFGenMatchCount(self, events: ak.Array, jets: ak.Array): costheta2 = pz2_boosted / np.sqrt(px2_boosted**2 + py2_boosted**2 + pz2_boosted**2) -<<<<<<< HEAD vbfVars[f"vbf_cos1_j1"] = np.abs(costheta1) # may have to treat same as nGoodVBFJets vbfVars[f"vbf_cos1_j2"] = np.abs(costheta2) #print(f"\nTime taken computing theta1 and 2: {time.time()-start_time:.6f} seconds") @@ -1278,48 +1011,6 @@ def getVBFGenMatchCount(self, events: ak.Array, jets: ak.Array): -======= - # Apply a mask for a low delta R value - mask_low_delta_R = delta_R < 0.4 - - # Find events where at least one of the pairs of jets has a low delta R - num_per_event = ak.sum(mask_low_delta_R, axis=-1) - - # logging.warning(f'Number of True values: {num_per_event}') - for i in range(3): - percentage = ak.sum(num_per_event == i) / len(num_per_event) * 100 - logging.warning( - f"Percentage of events with {i} true values: {percentage:.2f}% {len(num_per_event)} and {ak.sum(num_per_event == i)}" - ) - - # print kinematic properties of matches: - # Apply the low delta R mask to the jet pairs - matched_jet_pairs = jet_pairs[mask_low_delta_R] - - for i in range(3): - # Select events with the specified number of matches - event_mask = ak.sum(mask_low_delta_R, axis=-1) == i - - # For these events, get the corresponding reco and gen jets - selected_pairs = matched_jet_pairs[event_mask] - reco_jets = selected_pairs["reco"] - gen_jets = selected_pairs["gen"] - - # Calculate and print the average and standard deviation for mass, pt, phi and eta of both reco and gen jets - for name, jets in [("reco", reco_jets), ("gen", gen_jets)]: - avg_mass = ak.mean(jets.mass, axis=None) - std_mass = ak.std(jets.mass, axis=None) - avg_pt = ak.mean(jets.pt, axis=None) - std_pt = ak.std(jets.pt, axis=None) - avg_phi = ak.mean(jets.phi, axis=None) - std_phi = ak.std(jets.phi, axis=None) - avg_eta = ak.mean(jets.eta, axis=None) - std_eta = ak.std(jets.eta, axis=None) - - logging.warning( - f"For {i} matches, {name} jet average mass: {avg_mass:.1f}, std: {std_mass:.1f}, average pt: {avg_pt:.1f}, std: {std_pt:.1f}, average phi: {avg_phi:.1f}, std: {std_phi:.1f}, average eta: {avg_eta:.1f}, std: {std_eta:.1f}" - ) ->>>>>>> 219b36b8254c529b99a31b88fb9609023c060267 def getDijetVars( self, ak8FatJetVars: Dict, bb_mask: np.ndarray, pt_shift: str = None, mass_shift: str = None diff --git a/src/condor/submit.py b/src/condor/submit.py index 02df7ec5..d6f4b860 100644 --- a/src/condor/submit.py +++ b/src/condor/submit.py @@ -43,8 +43,9 @@ def main(args): elif args.site == "ucsd": t2_local_prefix = "/ceph/cms/" t2_prefix = "root://redirector.t2.ucsd.edu:1095" - proxy = "/home/users/rkansal/x509up_u31735" - + proxy = "/home/users/annava/projects/HHbbVV/test" # edited this to be this for Andres: grid-proxy-init -debug -verify + print(f'given proxy?',proxy) # debug + username = os.environ["USER"] local_dir = f"condor/{args.processor}/{args.tag}" homedir = f"/store/user/{username}/bbVV/{args.processor}/"