Skip to content

Commit

Permalink
new tagger and inference
Browse files Browse the repository at this point in the history
  • Loading branch information
rkansal47 committed Jun 28, 2023
1 parent 85c9f55 commit 153deaf
Show file tree
Hide file tree
Showing 11 changed files with 257 additions and 108 deletions.
190 changes: 130 additions & 60 deletions src/HHbbVV/processors/TaggerInference.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,103 @@ def _do_inference(
return request.as_numpy(out_name)


def _derive_vars_new_tagger(
jet_outputs: np.ndarray, jet_label: str, all_outputs: bool, tagger_vars: dict
):
if len(jet_outputs):
derived_vars = {
f"{jet_label}FatJetParTMD_probQCD": np.sum(jet_outputs[:, 309:314], axis=1),
f"{jet_label}FatJetParTMD_probT": np.sum(jet_outputs[:, :17], axis=1),
**{
f"{jet_label}FatJetParTMD_probH{l}": jet_outputs[:, 17 + i]
for i, l in enumerate(["bb", "cc", "ss", "qq", "bc", "bs", "cs", "gg"])
},
}

pnet_vars_all = {}

if all_outputs:
for i, output_name in enumerate(tagger_vars["output_names"]):
pnet_vars_all[f"{jet_label}FatJetParTMD_{output_name}"] = jet_outputs[:, i]

pvars = {**derived_vars, **pnet_vars_all}

else:
derived_vars = {
f"{jet_label}FatJetParTMD_probQCD": np.array([]),
f"{jet_label}FatJetParTMD_probT": np.array([]),
**{
f"{jet_label}FatJetParTMD_probH{l}": np.array([])
for i, l in enumerate(["bb", "cc", "ss", "qq", "bc", "bs", "cs", "gg"])
},
}
pnet_vars_all = {}

if all_outputs:
for i, output_name in enumerate(tagger_vars["output_names"]):
pnet_vars_all[f"{jet_label}FatJetParTMD_{output_name}"] = np.array([])

pvars = {**derived_vars, **pnet_vars_all}

return pvars


def _derive_vars(jet_outputs: np.ndarray, jet_label: str, all_outputs: bool, tagger_vars: dict):
if len(jet_outputs):
derived_vars = {
f"{jet_label}FatJetParTMD_probQCD": np.sum(jet_outputs[:, 23:28], axis=1),
f"{jet_label}FatJetParTMD_probHWW3q": np.sum(jet_outputs[:, 0:3], axis=1),
f"{jet_label}FatJetParTMD_probHWW4q": np.sum(jet_outputs[:, 3:6], axis=1),
f"{jet_label}FatJetParTMD_probT": np.sum(jet_outputs[:, 28:37], axis=1),
}

derived_vars[f"{jet_label}FatJetParTMD_THWW4q"] = (
derived_vars[f"{jet_label}FatJetParTMD_probHWW3q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probHWW4q"]
) / (
derived_vars[f"{jet_label}FatJetParTMD_probHWW3q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probHWW4q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probQCD"]
)

derived_vars[f"{jet_label}FatJetParTMD_THWWvsT"] = (
derived_vars[f"{jet_label}FatJetParTMD_probHWW3q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probHWW4q"]
) / (
derived_vars[f"{jet_label}FatJetParTMD_probHWW3q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probHWW4q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probQCD"]
+ derived_vars[f"{jet_label}FatJetParTMD_probT"]
)

pnet_vars_all = {}

if all_outputs:
for i, output_name in enumerate(tagger_vars["output_names"]):
pnet_vars_all[f"{jet_label}FatJetParTMD_{output_name}"] = jet_outputs[:, i]

pvars = {**derived_vars, **pnet_vars_all}

else:
derived_vars = {
f"{jet_label}FatJetParTMD_probQCD": np.array([]),
f"{jet_label}FatJetParTMD_probHWW3q": np.array([]),
f"{jet_label}FatJetParTMD_probHWW4q": np.array([]),
f"{jet_label}FatJetParTMD_probT": np.array([]),
f"{jet_label}FatJetParTMD_THWW4q": np.array([]),
f"{jet_label}FatJetParTMD_THWWvsT": np.array([]),
}
pnet_vars_all = {}

if all_outputs:
for i, output_name in enumerate(tagger_vars["output_names"]):
pnet_vars_all[f"{jet_label}FatJetParTMD_{output_name}"] = np.array([])

pvars = {**derived_vars, **pnet_vars_all}

return pvars


def runInferenceTriton(
tagger_resources_path: str,
events: NanoEventsArray,
Expand All @@ -545,6 +642,7 @@ def runInferenceTriton(
ak15: bool = False,
all_outputs: bool = False,
jet_label: str = None,
new_tagger: bool = False,
) -> dict:
"""Runs inference with the triton server.
Expand Down Expand Up @@ -577,7 +675,14 @@ def runInferenceTriton(
if jet_label is None:
jet_label = "ak15" if ak15 else "ak8"

with open(f"{tagger_resources_path}/triton_config_{'ak8' if not ak15 else 'ak15'}.json") as f:
if new_tagger:
config_name = "triton_config_new_tagger_ak8"
elif not ak15:
config_name = "triton_config_ak8"
else:
config_name = "triton_config_ak15"

with open(f"{tagger_resources_path}/{config_name}.json") as f:
triton_config = json.load(f)

with open(f"{tagger_resources_path}/{triton_config['model_name']}.json") as f:
Expand Down Expand Up @@ -633,74 +738,39 @@ def runInferenceTriton(
for jet_idx in range(num_jets):
print(f"Running inference for Jet {jet_idx + 1}")
start = time.time()
tagger_outputs.append(triton_model(tagger_inputs[jet_idx]))
out = triton_model(tagger_inputs[jet_idx])
time_taken = time.time() - start
print(f"Inference took {time_taken:.1f}s")

pnet_vars_list = []

for jet_idx in range(num_jets):
if len(tagger_outputs[jet_idx]):
derived_vars = {
f"{jet_label}FatJetParTMD_probQCD": np.sum(
tagger_outputs[jet_idx][:, 23:28], axis=1
),
f"{jet_label}FatJetParTMD_probHWW3q": np.sum(
tagger_outputs[jet_idx][:, 0:3], axis=1
),
f"{jet_label}FatJetParTMD_probHWW4q": np.sum(
tagger_outputs[jet_idx][:, 3:6], axis=1
),
f"{jet_label}FatJetParTMD_probT": np.sum(tagger_outputs[jet_idx][:, 28:37], axis=1),
}

derived_vars[f"{jet_label}FatJetParTMD_THWW4q"] = (
derived_vars[f"{jet_label}FatJetParTMD_probHWW3q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probHWW4q"]
) / (
derived_vars[f"{jet_label}FatJetParTMD_probHWW3q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probHWW4q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probQCD"]
)

derived_vars[f"{jet_label}FatJetParTMD_THWWvsT"] = (
derived_vars[f"{jet_label}FatJetParTMD_probHWW3q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probHWW4q"]
) / (
derived_vars[f"{jet_label}FatJetParTMD_probHWW3q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probHWW4q"]
+ derived_vars[f"{jet_label}FatJetParTMD_probQCD"]
+ derived_vars[f"{jet_label}FatJetParTMD_probT"]
if triton_config["num_reg"] > 0:
# separate class and regression outputs
out_cls, out_reg = (
out[:, : -triton_config["num_reg"]],
out[:, -triton_config["num_reg"] :],
)
# apply softmax if not already included in model
if triton_config["softmax"] == "False":
out_cls = softmax(out_cls, axis=1)

pnet_vars_all = {}
out = np.concatenate((out_cls, out_reg), axis=1)
elif triton_config["softmax"] == "False":
out = softmax(out, axis=1)

if all_outputs:
for i, output_name in enumerate(tagger_vars["output_names"]):
pnet_vars_all[f"{jet_label}FatJetParTMD_{output_name}"] = tagger_outputs[
jet_idx
][:, i]
tagger_outputs.append(out)

pvars = {**derived_vars, **pnet_vars_all}
pnet_vars_list.append(pvars)
pnet_vars_list = []

for jet_idx in range(num_jets):
if new_tagger:
pnet_vars_list.append(
_derive_vars_new_tagger(
tagger_outputs[jet_idx], jet_label, all_outputs, tagger_vars
)
)
else:
derived_vars = {
f"{jet_label}FatJetParTMD_probQCD": np.array([]),
f"{jet_label}FatJetParTMD_probHWW3q": np.array([]),
f"{jet_label}FatJetParTMD_probHWW4q": np.array([]),
f"{jet_label}FatJetParTMD_probT": np.array([]),
f"{jet_label}FatJetParTMD_THWW4q": np.array([]),
f"{jet_label}FatJetParTMD_THWWvsT": np.array([]),
}
pnet_vars_all = {}

if all_outputs:
for i, output_name in enumerate(tagger_vars["output_names"]):
pnet_vars_all[f"{jet_label}FatJetParTMD_{output_name}"] = np.array([])

pvars = {**derived_vars, **pnet_vars_all}
pnet_vars_list.append(pvars)
pnet_vars_list.append(
_derive_vars(tagger_outputs[jet_idx], jet_label, all_outputs, tagger_vars)
)

print(f"Total time taken: {time.time() - total_start:.1f}s")

Expand Down
3 changes: 2 additions & 1 deletion src/HHbbVV/processors/TaggerInputSkimmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from coffea.processor import ProcessorABC, dict_accumulator
from coffea.analysis_tools import PackedSelection

from .utils import add_selection_no_cutflow
from .utils import add_selection_no_cutflow, PAD_VAL
from .TaggerInference import (
get_pfcands_features,
get_svs_features,
Expand Down Expand Up @@ -487,6 +487,7 @@ def process(self, events: ak.Array):
events[selection.all(*selection.names)],
num_jets=self.num_jets,
ak15=False,
new_tagger=True,
)

for jet_idx in range(self.num_jets):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,9 @@
"label_QCD_cc",
"label_QCD_b",
"label_QCD_c",
"label_QCD_others"
"label_QCD_others",
"mreg1",
"mreg2"
],
"input_names": [
"pf_features",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{
"model_name": "Dec22_ak8_MD_vminclv2ParT_manual_fixwrap",
"model_url": "triton+grpc://67.58.49.52:8001/ak8_MD_vminclv2ParT_manual_fixwrap/1",
"batch_size": 192
"batch_size": 192,
"softmax": "True",
"num_reg": 0
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{
"model_name": "2023May30_ak8_MD_inclv8_part_2reg_manual",
"model_url": "triton+grpc://67.58.49.52:8001/ak8_MD_inclv8_part_2reg_manual/1",
"batch_size": 192
"batch_size": 192,
"softmax": "False",
"num_reg": 2
}
2 changes: 1 addition & 1 deletion src/HHbbVV/scale_factors/top_reweighting.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1274,7 +1274,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
"version": "3.10.8"
},
"orig_nbformat": 4,
"vscode": {
Expand Down
4 changes: 2 additions & 2 deletions src/HHbbVV/triton/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@ python weaver/train.py --train-mode hybrid \
For the 2023May30 model I had to:

1. Bypass the if statement here https://github.com/colizz/weaver-core-dev/blob/d038cd502d1b4a8ab3deefa9c3022bd3b812baf5/weaver/networks/ParticleTransformer2023.py#L586 (just `return output` instead) - torch complained about comparing a tensor to a Python boolean.
2. Rewrite this einsum statement https://github.com/colizz/weaver-core-dev/blob/d038cd502d1b4a8ab3deefa9c3022bd3b812baf5/weaver/networks/ParticleTransformer2023.py#L448C14-L448C14 -> https://github.com/colizz/weaver-core-dev/blob/d038cd502d1b4a8ab3deefa9c3022bd3b812baf5/weaver/networks/example_ParticleTransformerTagger_hybrid_outputWithHidNeurons.py#L337 - onnx opset v11 doesn't support einsum.

2. Remove the softmax part https://github.com/colizz/weaver-core-dev/blob/d038cd502d1b4a8ab3deefa9c3022bd3b812baf5/weaver/networks/ParticleTransformer2023.py#L583-L584 (doesn't make sense since regression outputs are included).
3. Rewrite this einsum statement https://github.com/colizz/weaver-core-dev/blob/d038cd502d1b4a8ab3deefa9c3022bd3b812baf5/weaver/networks/ParticleTransformer2023.py#L448C14-L448C14 -> https://github.com/colizz/weaver-core-dev/blob/d038cd502d1b4a8ab3deefa9c3022bd3b812baf5/weaver/networks/example_ParticleTransformerTagger_hybrid_outputWithHidNeurons.py#L337 - onnx opset v11 doesn't support einsum.

Loading

0 comments on commit 153deaf

Please sign in to comment.