From 4fe949d5a3bc5b51652c3d1958285e86b2c86123 Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Wed, 18 Dec 2024 23:57:39 -0500 Subject: [PATCH 1/3] calo_pid: pass input files via a list file --- benchmarks/calo_pid/Snakefile | 23 +++++++++++++---------- benchmarks/calo_pid/calo_pid.org | 13 +++++++++---- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/benchmarks/calo_pid/Snakefile b/benchmarks/calo_pid/Snakefile index a93cfc4..31b0298 100644 --- a/benchmarks/calo_pid/Snakefile +++ b/benchmarks/calo_pid/Snakefile @@ -64,22 +64,25 @@ exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \ """ -rule calo_pid: +rule calo_pid_input_list: input: electrons=expand( - "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root", - PARTICLE=["e-"], - ENERGY=["100MeVto20GeV"], - PHASE_SPACE=["130to177deg"], - INDEX=range(100), - ), - pions=expand( - "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root", - PARTICLE=["pi-"], + "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root", ENERGY=["100MeVto20GeV"], PHASE_SPACE=["130to177deg"], INDEX=range(100), ), + output: + "listing/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst", + run: + with open(output[0], "wt") as fp: + fp.write("\n".join(input)) + + +rule calo_pid: + input: + electrons="listing/calo_pid/{DETECTOR_CONFIG}/e-.lst", + pions="listing/calo_pid/{DETECTOR_CONFIG}/pi-.lst", matplotlibrc=".matplotlibrc", script="benchmarks/calo_pid/calo_pid.py", output: diff --git a/benchmarks/calo_pid/calo_pid.org b/benchmarks/calo_pid/calo_pid.org index a46aec0..965af14 100644 --- a/benchmarks/calo_pid/calo_pid.org +++ b/benchmarks/calo_pid/calo_pid.org @@ -32,8 +32,8 @@ vector.register_awkward() #+begin_src jupyter-python :results silent DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG") PLOT_TITLE=os.environ.get("PLOT_TITLE") -INPUT_PIONS=os.environ.get("INPUT_PIONS", "").split(" ") -INPUT_ELECTRONS=os.environ.get("INPUT_ELECTRONS", "").split(" ") +INPUT_PIONS=os.environ.get("INPUT_PIONS") +INPUT_ELECTRONS=os.environ.get("INPUT_ELECTRONS") output_dir=Path(os.environ.get("OUTPUT_DIR", "./")) output_dir.mkdir(parents=True, exist_ok=True) @@ -75,8 +75,13 @@ def filter_pointing(events): cond = (part_momentum.eta[:,0] > -3.5) & (part_momentum.eta[:,0] < -2.) return events[cond] -e = filter_pointing(uproot.concatenate({filename: "events" for filename in INPUT_ELECTRONS}, filter_name=["MCParticles.*", "*EcalEndcapN*"])) -pi = filter_pointing(uproot.concatenate({filename: "events" for filename in INPUT_PIONS}, filter_name=["MCParticles.*", "*EcalEndcapN*"])) +def readlist(path): + with open(path, "rt") as fp: + paths = [line.rstrip() for line in fp.readlines()] + return paths + +e = filter_pointing(uproot.concatenate({filename: "events" for filename in readlist(INPUT_ELECTRONS)}, filter_name=["MCParticles.*", "*EcalEndcapN*"])) +pi = filter_pointing(uproot.concatenate({filename: "events" for filename in readlist(INPUT_PIONS)}, filter_name=["MCParticles.*", "*EcalEndcapN*"])) e_train = e[:len(pi)//2] pi_train = pi[:len(pi)//2] From fd8629e78223c4a342b2752a1fe50dd72ebfbad2 Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Wed, 18 Dec 2024 23:57:59 -0500 Subject: [PATCH 2/3] calo_pid: produce EcalEndcapNParticleIDOutput_probability_tensor --- benchmarks/calo_pid/Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/calo_pid/Snakefile b/benchmarks/calo_pid/Snakefile index 31b0298..d9c48c9 100644 --- a/benchmarks/calo_pid/Snakefile +++ b/benchmarks/calo_pid/Snakefile @@ -60,7 +60,7 @@ rule calo_pid_recon: set -m # monitor mode to prevent lingering processes exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \ eicrecon {input} -Ppodio:output_file={output} \ - -Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters,EcalEndcapNParticleIDInput_features,EcalEndcapNParticleIDTarget + -Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters,EcalEndcapNParticleIDInput_features,EcalEndcapNParticleIDTarget,EcalEndcapNParticleIDOutput_probability_tensor """ From 2924d820cfdd9c7ec1961c530b540e83b4fcc011 Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Wed, 18 Dec 2024 23:58:15 -0500 Subject: [PATCH 3/3] calo_pid: savefig inferred probability difference --- benchmarks/calo_pid/calo_pid.org | 1 + 1 file changed, 1 insertion(+) diff --git a/benchmarks/calo_pid/calo_pid.org b/benchmarks/calo_pid/calo_pid.org index 965af14..c9ec2fa 100644 --- a/benchmarks/calo_pid/calo_pid.org +++ b/benchmarks/calo_pid/calo_pid.org @@ -363,6 +363,7 @@ if "_EcalEndcapNParticleIDOutput_probability_tensor_floatData" in pi_train.field eval_proba = ak.concatenate([pi_eval_proba, e_eval_proba]) plt.hist(clf.predict_proba(eval_x.to_numpy())[:,1] - eval_proba[:,1].to_numpy()) + plt.savefig(output_dir / f"proba_diff.pdf", bbox_inches="tight") plt.show() else: print("EcalEndcapNParticleIDOutput not present")