From decb9917facbba20aef7ef7cd155efddb975980f Mon Sep 17 00:00:00 2001
From: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Date: Tue, 19 Dec 2023 12:44:15 -0500
Subject: [PATCH] add benchmarks/backgrounds

---
 .gitlab-ci.yml                            |   1 +
 Snakefile                                 |  10 +
 benchmarks/backgrounds/Snakefile          |  85 +++++
 benchmarks/backgrounds/config.yml         |  22 ++
 benchmarks/backgrounds/ecal_backwards.org | 442 ++++++++++++++++++++++
 benchmarks/backgrounds/org2py.awk         |  22 ++
 6 files changed, 582 insertions(+)
 create mode 100644 benchmarks/backgrounds/Snakefile
 create mode 100644 benchmarks/backgrounds/config.yml
 create mode 100644 benchmarks/backgrounds/ecal_backwards.org
 create mode 100644 benchmarks/backgrounds/org2py.awk

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 66f794e0..1a0ffb5b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -128,6 +128,7 @@ get_data:
       - runner_system_failure
 
 include: 
+  - local: 'benchmarks/backgrounds/config.yml'
   - local: 'benchmarks/tracking_detectors/config.yml'
   - local: 'benchmarks/barrel_ecal/config.yml'
   - local: 'benchmarks/barrel_hcal/config.yml'
diff --git a/Snakefile b/Snakefile
index 3bd2c69c..8cdca119 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1 +1,11 @@
+include: "benchmarks/backgrounds/Snakefile"
 include: "benchmarks/barrel_ecal/Snakefile"
+
+rule matplotlibrc:
+    output:
+        ".matplotlibrc",
+    run:
+        with open(output[0], "wt") as fp:
+            fp.write("backend: Agg\n")
+            # interactive mode prevents plt.show() from blocking
+            fp.write("interactive : True\n")
diff --git a/benchmarks/backgrounds/Snakefile b/benchmarks/backgrounds/Snakefile
new file mode 100644
index 00000000..18abe77c
--- /dev/null
+++ b/benchmarks/backgrounds/Snakefile
@@ -0,0 +1,85 @@
+import os
+
+from snakemake.remote.S3 import RemoteProvider as S3RemoteProvider
+
+
+S3 = S3RemoteProvider(
+    endpoint_url="https://dtn01.sdcc.bnl.gov:9000",
+    access_key_id=os.environ["S3_ACCESS_KEY"],
+    secret_access_key=os.environ["S3_SECRET_KEY"],
+)
+
+
+rule backgrounds_get:
+    input:
+        S3.remote("eictest/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/beam_gas_ep_10GeV_foam_emin10keV_10Mevt_vtx.hepmc"),
+        S3.remote("eictest/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/ProtonBeamGasEvents/100GeV/100GeV_1.hepmc"),
+        S3.remote("eictest/EPIC/FULL/23.11.0/epic_craterlake/DIS/NC/10x100/minQ2=10/pythia8NCDIS_10x100_minQ2=10_beamEffects_xAngle=-0.025_hiDiv_1.0000.edm4hep.root"),
+    output:
+        "input/backgrounds/beam_gas_electron.hepmc",
+        "input/backgrounds/beam_gas_proton.hepmc",
+        dis_sim="input/backgrounds/pythia8NCDIS_10x100_minQ2=10_beamEffects_xAngle=-0.025_hiDiv_1.0000.edm4hep.root",
+    run:
+        for src, dst in zip(input, output):
+            os.link(src, dst)
+
+
+rule backgrounds_sim:
+    input:
+        "input/backgrounds/beam_gas_{BEAM}.hepmc",
+    output:
+        "sim/{DETECTOR_CONFIG}/beam_gas_{BEAM}.edm4hep.root",
+    log:
+        "sim/{DETECTOR_CONFIG}/beam_gas_{BEAM}.edm4hep.root.log",
+    params:
+        N_EVENTS=100
+    shell:
+        """
+ddsim \
+  --runType batch \
+  --part.minimalKineticEnergy 100*GeV  \
+  --filter.tracker edep0 \
+  -v WARNING \
+  --numberOfEvents {params.N_EVENTS} \
+  --compactFile $DETECTOR_PATH/{wildcards.DETECTOR_CONFIG}.xml \
+  --inputFiles {input} \
+  --outputFile {output}
+"""
+
+
+rule backgrounds_org2py:
+    input:
+        notebook=workflow.source_path("ecal_backwards.org"),
+        converter=workflow.source_path("./org2py.awk"),
+    output:
+        "ecal_backwards.py"
+    shell:
+        """
+awk -f {input.converter} {input.notebook} > {output}
+"""
+
+DETECTOR_CONFIG=os.environ["DETECTOR_CONFIG"]
+
+rule backgrounds_ecal_backwards:
+    input:
+        matplotlibrc=".matplotlibrc",
+        script="ecal_backwards.py",
+        electron_beam_gas_gen="input/backgrounds/beam_gas_electron.hepmc",
+        electron_beam_gas_sim="sim/" + DETECTOR_CONFIG + "/beam_gas_electron.edm4hep.root",
+        physics_signal_sim=rules.backgrounds_get.output.dis_sim,
+        proton_beam_gas_gen="input/backgrounds/beam_gas_proton.hepmc",
+        proton_beam_gas_sim="sim/" + DETECTOR_CONFIG + "/beam_gas_proton.edm4hep.root",
+    output:
+        directory("results/backgrounds/backwards_ecal")
+    shell:
+        """
+env \
+MATPLOTLIBRC={input.matplotlibrc} \
+ELECTRON_BEAM_GAS_GEN=$(realpath {input.electron_beam_gas_gen}) \
+ELECTRON_BEAM_GAS_SIM=$(realpath {input.electron_beam_gas_sim}) \
+PHYSICS_PROCESS_SIM=$(realpath {input.physics_signal_sim}) \
+PROTON_BEAM_GAS_GEN=$(realpath {input.proton_beam_gas_gen}) \
+PROTON_BEAM_GAS_SIM=$(realpath {input.proton_beam_gas_sim}) \
+OUTPUT_DIR={output} \
+python {input.script}
+"""
diff --git a/benchmarks/backgrounds/config.yml b/benchmarks/backgrounds/config.yml
new file mode 100644
index 00000000..513209a4
--- /dev/null
+++ b/benchmarks/backgrounds/config.yml
@@ -0,0 +1,22 @@
+sim:backgrounds:
+  extends: .det_benchmark
+  stage: simulate
+  script:
+    - snakemake --cores 1 backgrounds_get
+    - snakemake --cores 1 sim/$DETECTOR_CONFIG/beam_gas_{electron,proton}.edm4hep.root
+
+bench:backgrounds_emcal_backwards:
+  extends: .det_benchmark
+  stage: benchmarks
+  needs:
+    - ["sim:backgrounds"]
+  script:
+    - snakemake --cores 1 backgrounds_ecal_backwards
+
+collect_results:backgrounds:
+  extends: .det_benchmark
+  stage: collect
+  needs:
+    - "bench:backgrounds_emcal_backwards"
+  script:
+    - ls -lrht
diff --git a/benchmarks/backgrounds/ecal_backwards.org b/benchmarks/backgrounds/ecal_backwards.org
new file mode 100644
index 00000000..321d8081
--- /dev/null
+++ b/benchmarks/backgrounds/ecal_backwards.org
@@ -0,0 +1,442 @@
+#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:eeemcal :async yes :results drawer :exports both
+
+#+TITLE: ePIC EEEMCal background rates study
+#+AUTHOR: Dmitry Kalinkin
+#+OPTIONS: d:t
+
+#+begin_src jupyter-python :results silent
+import os
+from pathlib import Path
+
+import awkward as ak
+import boost_histogram as bh
+import dask
+import dask_awkward as dak
+import dask_histogram as dh
+import numpy as np
+import uproot
+import pyhepmc
+#+end_src
+
+#+begin_src jupyter-python :results silent
+from dask.distributed import Client
+
+client = Client(processes=False)
+#+end_src
+
+* Plotting setup
+
+#+begin_src jupyter-python :results silent
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+
+def setup_presentation_style():
+    mpl.rcParams.update(mpl.rcParamsDefault)
+    plt.style.use('ggplot')
+    plt.rcParams.update({
+        'axes.labelsize': 8,
+        'axes.titlesize': 9,
+        'figure.titlesize': 9,
+        'figure.figsize': (4, 3),
+        'legend.fontsize': 7,
+        'xtick.labelsize': 8,
+        'ytick.labelsize': 8,
+        'pgf.rcfonts': False,
+    })
+
+setup_presentation_style()
+#+end_src
+
+* Analysis
+
+** Input files
+
+#+begin_src jupyter-python :results silent
+ELECTRON_BEAM_GAS_GEN=os.environ.get("ELECTRON_BEAM_GAS_GEN", "../beam_gas_ep_10GeV_foam_emin10keV_10Mevt_vtx.hepmc")
+ELECTRON_BEAM_GAS_SIM=os.environ.get("ELECTRON_BEAM_GAS_SIM", "electron_beam_gas.edm4hep.root")
+PHYSICS_PROCESS_SIM=os.environ.get("PHYSICS_PROCESS_SIM", "pythia8NCDIS_10x100_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_1.edm4hep.root")
+PROTON_BEAM_GAS_GEN=os.environ.get("PROTON_BEAM_GAS_GEN", "100GeV.hepmc")
+PROTON_BEAM_GAS_SIM=os.environ.get("PROTON_BEAM_GAS_SIM", "proton+beam_gas_ep.edm4hep.root")
+
+output_dir=Path(os.environ.get("OUTPUT_DIR", "./"))
+output_dir.mkdir(parents=True, exist_ok=True)
+#+end_src
+
+#+begin_src jupyter-python :results silent
+builder = ak.ArrayBuilder()
+n = 0
+with pyhepmc.open(PROTON_BEAM_GAS_GEN) as f:
+    it = iter(f)
+    for event in f:
+        builder.begin_list()
+        for vertex in event.vertices:
+            assert event.length_unit.name == "MM"
+            with builder.record("Vector4D"):
+                builder.field("x")
+                builder.real(vertex.position.x)
+                builder.field("y")
+                builder.real(vertex.position.y)
+                builder.field("z")
+                builder.real(vertex.position.z)
+                builder.field("t")
+                builder.real(vertex.position.t)
+        builder.end_list()
+        n += 1
+        if n > 10000: break
+
+vertices_proton_beam_gas = builder.snapshot()
+
+builder = ak.ArrayBuilder()
+n = 0
+with pyhepmc.open(ELECTRON_BEAM_GAS_GEN) as f:
+    it = iter(f)
+    for event in f:
+        builder.begin_list()
+        for vertex in event.vertices:
+            assert event.length_unit.name == "MM"
+            with builder.record("Vector3D"):
+                builder.field("x")
+                builder.real(vertex.position.x)
+                builder.field("y")
+                builder.real(vertex.position.y)
+                builder.field("z")
+                builder.real(vertex.position.z)
+                builder.field("t")
+                builder.real(vertex.position.t)
+        builder.end_list()
+        n += 1
+        if n > 10000: break
+
+vertices_electron_beam_gas = builder.snapshot()
+#+end_src
+
+#+begin_src jupyter-python :results silent
+def filter_name(name):
+    return "Hits." in name or "MCParticles." in name
+
+datasets = {
+    # https://wiki.bnl.gov/EPIC/index.php?title=Electron_Beam_Gas
+    "electron beam gas 10 GeV": {
+        "vertices": vertices_electron_beam_gas,
+        "events": uproot.dask({ELECTRON_BEAM_GAS_SIM: "events"}, filter_name=filter_name, open_files=False, steps_per_file=512),
+        "cmap": "cool",
+        "rate": 3.2e6, # Hz
+    },
+    "DIS 10x100, $Q^2 > 1$ GeV${}^2$": {
+        "events": uproot.dask({PHYSICS_PROCESS_SIM: "events"}, filter_name=filter_name, open_files=False, steps_per_file=512),
+        "rate": 184e3, # Hz
+    },
+    # https://wiki.bnl.gov/EPIC/index.php?title=Hadron_Beam_Gas
+    "proton beam gas 100 GeV": {
+        "vertices": vertices_proton_beam_gas,
+        "events": uproot.dask({PROTON_BEAM_GAS_SIM: "events"}, filter_name=filter_name, open_files=False, steps_per_file=512),
+        "cmap": "summer",
+        "rate": 31.9e3, # Hz
+    },
+}
+
+for ds in datasets.values():
+    ds["events"].eager_compute_divisions()
+#+end_src
+
+** Vertex distributions
+
+#+begin_src jupyter-python
+for label, ds in datasets.items():
+    if "vertices" not in ds: continue
+    vs = ds["vertices"]
+    weight = ds["rate"] / ak.num(ds["vertices"], axis=0)
+    plt.hist(ak.ravel(vs.t[:,0]), bins=100, histtype="step", label=label)
+plt.minorticks_on()
+plt.xlabel("vertex[0].t, mm")
+plt.legend()
+plt.savefig(output_dir / "vertex_time_distribution.pdf", bbox_inches="tight")
+plt.show()
+
+for label, ds in datasets.items():
+    if "vertices" not in ds: continue
+    vs = ds["vertices"]
+    weight = ds["rate"] / ak.num(ds["vertices"], axis=0)
+    plt.hist(ak.ravel(vs.z[:,0]), bins=100, histtype="step", label=label)
+plt.minorticks_on()
+plt.xlabel("vertex[0].z, mm")
+plt.legend()
+plt.savefig(output_dir / "vertex_z_distribution.pdf", bbox_inches="tight")
+plt.show()
+
+for label, ds in datasets.items():
+    if "vertices" not in ds: continue
+    vs = ds["vertices"]
+    cmap = ds["cmap"]
+    weight = ds["rate"] / ak.num(ds["vertices"], axis=0)
+    plt.hist2d(vs.z[:,0].to_numpy(), vs.x[:,0].to_numpy(), bins=(100, np.linspace(-130, 130, 160)), cmin=1e-30, label=label, cmap=cmap)
+    plt.plot([], color=mpl.colormaps[cmap](0.5), label=label)
+plt.minorticks_on()
+plt.xlabel("vertex[0].z, mm")
+plt.ylabel("vertex[0].x, mm")
+plt.legend()
+plt.savefig(output_dir / "vertex_xz_distribution.pdf", bbox_inches="tight")
+plt.show()
+
+for ix, (label, ds) in enumerate(datasets.items()):
+    if "vertices" not in ds: continue
+    vs = ds["vertices"]
+    cmap = ds["cmap"]
+    weight = ds["rate"] / ak.num(ds["vertices"], axis=0)
+    plt.hist2d(vs.z[:,0].to_numpy(), vs.y[:,0].to_numpy(), bins=(100, 100), cmin=1e-30, cmap=cmap)
+    plt.colorbar()
+    plt.minorticks_on()
+    plt.xlabel("vertex[0].z, mm")
+    plt.ylabel("vertex[0].y, mm")
+    plt.title(label)
+    plt.savefig(output_dir / f"vertex_yz_distribution_{ix}.pdf", bbox_inches="tight")
+    plt.show()
+#+end_src
+
+** Simulation results
+
+#+begin_src jupyter-python
+for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
+    for dataset_ix, (label, ds) in enumerate(datasets.items()):
+        events = ds["events"]
+
+        energy_sums = ak.sum(events[f"{collection_name}.energy"].head(10000), axis=1)
+        event_id = ak.argmax(energy_sums)
+        xs = events[f"{collection_name}.position.x"].head(event_id + 1)[event_id].to_numpy()
+        ys = events[f"{collection_name}.position.y"].head(event_id + 1)[event_id].to_numpy()
+
+        bin_widths = [None, None]
+        for ix, vals in enumerate([xs, ys]):
+            centers = np.unique(vals)
+            diffs = centers[1:] - centers[:-1]
+            bin_widths[ix] = np.min(diffs[diffs > 0])
+            print(f"bin_widths[{ix}]", bin_widths[ix])
+
+        bins = {
+            "EcalEndcapNHits": [np.arange(-750., 750., bin_width) for bin_width in bin_widths],
+            "EcalEndcapPHits": [np.arange(-1800., 1800., bin_width) for bin_width in bin_widths],
+        }[collection_name]
+
+        plt.hist2d(
+            xs,
+            ys,
+            weights=events[f"{collection_name}.energy"].head(event_id + 1)[event_id].to_numpy(),
+            bins=bins,
+            cmin=1e-10,
+        )
+        plt.colorbar().set_label("energy, GeV", loc="top")
+        plt.title(f"{label}, event_id={event_id}\n{collection_name}")
+        plt.xlabel("hit x, mm", loc="right")
+        plt.ylabel("hit y, mm", loc="top")
+        plt.savefig(output_dir / f"{collection_name}_event_display_{dataset_ix}.pdf", bbox_inches="tight")
+        plt.show()
+#+end_src
+
+** Discovering number of cells
+
+Using HyperLogLog algorithm would be faster here, or actually load
+DD4hep geometry and count sensitive volumes.
+
+#+begin_src jupyter-python
+def unique(array):
+    if ak.backend(array) == "typetracer":
+        ak.typetracer.touch_data(array)
+        return array
+    return ak.from_numpy(np.unique(ak.to_numpy(ak.ravel(array))))
+unique_delayed = dask.delayed(unique)
+len_delayed = dask.delayed(len)
+
+cellID_for_r = dict()
+
+for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
+    r_axis = {
+        "EcalEndcapNHits": bh.axis.Regular(75, 0., 750.),
+        "EcalEndcapPHits": bh.axis.Regular(90, 0., 1800.),
+    }[collection_name]
+    ds = datasets["DIS 10x100, $Q^2 > 1$ GeV${}^2$"]
+    events = ds["events"]
+
+    r = np.hypot(
+        ak.ravel(events[f"{collection_name}.position.x"]),
+        ak.ravel(events[f"{collection_name}.position.y"]),
+    )
+    cellID = ak.ravel(events[f"{collection_name}.cellID"])
+
+    cellID_for_r[collection_name] = np.array(client.gather(client.compute([
+        len_delayed(unique_delayed(
+            cellID[(r >= r_min) & (r < r_max)].map_partitions(unique)
+        ))
+        for r_min, r_max in zip(r_axis.edges[:-1], r_axis.edges[1:])
+    ])))
+
+    print(cellID_for_r[collection_name])
+    print(sum(cellID_for_r[collection_name]))
+
+    plt.stairs(
+        cellID_for_r[collection_name],
+        r_axis.edges,
+    )
+
+    plt.title(f"{collection_name}")
+    plt.legend()
+    plt.xlabel("r, mm", loc="right")
+    dr = (r_axis.edges[1] - r_axis.edges[0])
+    plt.ylabel(f"Number of towers per {dr} mm slice in $r$", loc="top")
+    plt.savefig(output_dir / f"{collection_name}_num_towers.pdf", bbox_inches="tight")
+    plt.show()
+#+end_src
+
+** Plotting the rates
+
+#+begin_src jupyter-python
+for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
+    r_axis = {
+        "EcalEndcapNHits": bh.axis.Regular(75, 0., 750.),
+        "EcalEndcapPHits": bh.axis.Regular(90, 0., 1800.),
+    }[collection_name]
+    for edep_min in [0.005, 0.015, 0.050]: # GeV
+        for label, ds in datasets.items():
+            events = ds["events"]
+            weight = ds["rate"] / len(events)
+
+            r = np.hypot(
+                ak.ravel(events[f"{collection_name}.position.x"]),
+                ak.ravel(events[f"{collection_name}.position.y"]),
+            )
+            edep = ak.ravel(events[f"{collection_name}.energy"])
+            r = r[edep > edep_min]
+
+            hist = dh.factory(
+                r,
+                axes=(r_axis,),
+            ).compute()
+            plt.stairs(
+                hist.values() * weight / cellID_for_r[collection_name],
+                hist.axes[0].edges,
+                label=f"{label}",
+            )
+
+        plt.title(f"for $E_{{dep.}} >$ {edep_min * 1000} MeV\n{collection_name}")
+        plt.legend()
+        plt.xlabel("r, mm", loc="right")
+        plt.ylabel("rate per tower, Hz", loc="top")
+        plt.yscale("log")
+        plt.savefig(output_dir / f"{collection_name}_hit_rate_vs_r_edep_min_{edep_min:.3f}.pdf", bbox_inches="tight")
+        plt.show()
+#+end_src
+
+#+begin_src jupyter-python
+for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
+    for totedep_min in [-1, 0, 0.1, 0.5, 1.0, 5.0, 10.]: # GeV
+        for label, ds in datasets.items():
+            events = ds["events"]
+            weight = ds["rate"] / len(events)
+
+            z = ds["events"]["MCParticles.vertex.z"][:,1]
+            totedep = ak.sum(events[f"{collection_name}.energy"], axis=1)
+            z = z[totedep > totedep_min]
+
+            hist = dh.factory(
+                z,
+                axes=(bh.axis.Regular(250, -7500., 17500.),),
+            ).compute()
+            plt.stairs(
+                hist.values() * weight,
+                hist.axes[0].edges,
+                label=f"{label}",
+            )
+
+        plt.title(rf"for events with $E_{{\mathrm{{dep. tot.}}}}$ $>$ {totedep_min} GeV" + f"\n{collection_name}")
+        plt.legend()
+        plt.xlabel("$z$ of the first interaction vertex, mm", loc="right")
+        plt.ylabel("rate, Hz", loc="top")
+        plt.yscale("log")
+        plt.savefig(output_dir / f"{collection_name}_hit_rate_vs_z_totedep_min_{totedep_min:.1f}.pdf", bbox_inches="tight")
+        plt.show()
+#+end_src
+
+#+begin_src jupyter-python
+num_towers_cache = {
+    "LumiSpecCAL": 200,
+    "LumiDirectPCAL": 1,
+    "ZDCHcal": 1470,
+    "LFHCAL": 578338,
+    "ZDC_WSi_": 187043,
+    "EcalBarrelScFi": 124205483,
+    "EcalEndcapP": 15037,
+    "ZDCEcal": 400,
+    "EcalEndcapPInsert": 536,
+    "HcalEndcapPInsert": 20251,
+    "B0ECal": 131,
+    "HcalEndcapN": 13800,
+    "HcalBarrel": 7680,
+    "EcalBarrelImaging": 5765469,
+    "EcalEndcapN": 2988,
+    "ZDC_PbSi_": 44344,
+}
+
+fig_cmb = plt.figure()
+ax_cmb = fig_cmb.gca()
+
+for edep_min in [0]: # GeV
+    for dataset_ix, (x_offset, (ds_label, ds)) in enumerate(zip(np.linspace(-0.3, 0.3, len(datasets)), datasets.items())):
+        events = ds["events"]
+        weight = ds["rate"] / len(events)
+
+        labels = []
+        values = []
+        norms = []
+
+        for branch_name in events.fields:
+            if ".energy" not in branch_name: continue
+            if "ZDC_SiliconPix_Hits" in branch_name: continue
+
+            edep = ak.ravel(events[branch_name])
+
+            #cellID = ak.ravel(events[branch_name.replace(".energy", ".cellID")])
+            #num_towers = len(unique_delayed(
+            #    cellID.map_partitions(unique)
+            #).compute())
+
+            num_towers = num_towers_cache[branch_name.replace("Hits.energy", "")]
+
+            labels.append(branch_name.replace("Hits.energy", ""))
+            values.append(ak.count(edep[edep > edep_min]))
+            norms.append(num_towers if num_towers != 0 else np.nan)
+
+        fig_cur = plt.figure()
+        ax_cur = fig_cur.gca()
+
+        values, = dask.compute(values)
+        for ax, per_tower, offset, width in [
+                (ax_cmb, True, x_offset, 2 * 0.3 / (len(datasets) - 1)),
+                (ax_cur, False, 0, 2 * 0.3),
+        ]:
+            ax.bar(
+                np.arange(len(labels)) + offset,
+                weight * np.array(values) / (np.array(norms) if per_tower else np.ones_like(norms)),
+                width=width,
+                tick_label=labels,
+                label=ds_label,
+                color=f"C{dataset_ix}",
+            )
+
+        plt.sca(ax_cur)
+        plt.legend()
+        plt.title(f"for $E_{{dep.}} >$ {edep_min * 1000} MeV")
+        plt.ylabel("rate, Hz", loc="top")
+        plt.yscale("log")
+        plt.xticks(rotation=90, ha='right')
+        fig_cur.savefig(f"rates_edep_min_{edep_min}_{dataset_ix}.pdf", bbox_inches="tight")
+        plt.show()
+        plt.close(fig_cur)
+
+    plt.sca(ax_cmb)
+    plt.legend()
+    plt.title(f"for $E_{{dep.}} >$ {edep_min * 1000} MeV")
+    plt.ylabel("rate per tower, Hz", loc="top")
+    plt.yscale("log")
+    plt.xticks(rotation=90, ha='right')
+    fig_cmb.savefig(f"rates_edep_min_{edep_min}.pdf", bbox_inches="tight")
+    plt.show()
+#+end_src
diff --git a/benchmarks/backgrounds/org2py.awk b/benchmarks/backgrounds/org2py.awk
new file mode 100644
index 00000000..0d23cf25
--- /dev/null
+++ b/benchmarks/backgrounds/org2py.awk
@@ -0,0 +1,22 @@
+BEGIN {
+	in_src = 0
+	IGNORECASE=1
+}
+
+/^#\+begin_src\s+[^\s]*python/ {
+	in_src = 1
+	match($0, /^ */)
+	spaces = RLENGTH
+	next
+}
+
+/^#\+end_src/ {
+	in_src = 0
+	next
+}
+
+in_src {
+	re = "^ {" spaces "}"
+	gsub(re,"")
+	print
+}