From 7fc3dbd76b122dc5e70bb80cac55418970d84210 Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Wed, 18 Dec 2024 15:39:35 -0500 Subject: [PATCH 01/10] Move `compute_center_times` from straxen to strax (#1501) * Move `compute_center_times` from straxen to strax * Update version * Debug --- straxen/plugins/peaks/peak_basics_vanilla.py | 18 +++--------------- tests/test_peaks.py | 3 +++ 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/straxen/plugins/peaks/peak_basics_vanilla.py b/straxen/plugins/peaks/peak_basics_vanilla.py index 391a2af6b..329d9d25f 100644 --- a/straxen/plugins/peaks/peak_basics_vanilla.py +++ b/straxen/plugins/peaks/peak_basics_vanilla.py @@ -1,5 +1,4 @@ import numpy as np -import numba import strax import straxen @@ -15,7 +14,7 @@ class PeakBasicsVanilla(strax.Plugin): """ - __version__ = "0.1.4" + __version__ = "0.1.5" depends_on = "peaks" provides = "peak_basics" @@ -103,21 +102,10 @@ def compute(self, peaks): if self.check_peak_sum_area_rtol is not None: self.check_area(area_total, p, self.check_peak_sum_area_rtol) # Negative or zero-area peaks have centertime at startime - r["center_time"] = p["time"] - r["center_time"][m] += self.compute_center_times(peaks[m]) + r["center_time"][~m] = p["time"][~m] + r["center_time"][m] = strax.compute_center_time(p[m]) return r - @staticmethod - @numba.njit(cache=True, nogil=True) - def compute_center_times(peaks): - result = np.zeros(len(peaks), dtype=np.int32) - for p_i, p in enumerate(peaks): - t = 0 - for t_i, weight in enumerate(p["data"]): - t += t_i * p["dt"] * weight - result[p_i] = t / p["area"] - return result - @staticmethod def check_area(area_per_channel_sum, peaks, rtol) -> None: """Check if the area of the sum-wf is the same as the total area (if the area of the peak is diff --git a/tests/test_peaks.py b/tests/test_peaks.py index fd525ecf5..f5fb329e4 100644 --- a/tests/test_peaks.py +++ b/tests/test_peaks.py @@ -46,6 +46,9 @@ def test_aft_equals1(self, test_peak_idx): test_data = self.get_test_peaks() test_data[test_peak_idx]["area_per_channel"][: self.n_top] = 1 test_data[test_peak_idx]["area"] = np.sum(test_data[test_peak_idx]["area_per_channel"]) + test_data[test_peak_idx]["data"][: test_data[test_peak_idx]["length"]] = ( + test_data[test_peak_idx]["area"] / test_data[test_peak_idx]["length"] + ) peaks = self.peaks_basics.compute(test_data) assert peaks[test_peak_idx]["area_fraction_top"] == 1 From 4d2c89e906e3bcd46c9702ff6700675ddf455bf3 Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Wed, 18 Dec 2024 16:55:35 -0500 Subject: [PATCH 02/10] Use numpy and strax native dtypes, not `" 0 # Only label the selection diff --git a/straxen/plugins/afterpulses/afterpulse_processing.py b/straxen/plugins/afterpulses/afterpulse_processing.py index e6dfef058..fdba90a7a 100644 --- a/straxen/plugins/afterpulses/afterpulse_processing.py +++ b/straxen/plugins/afterpulses/afterpulse_processing.py @@ -412,45 +412,41 @@ def dtype_afterpulses(): - The afterpulse datatype """ - dtype_ap = [ - (("Channel/PMT number", "channel"), " Date: Thu, 19 Dec 2024 23:36:18 -0500 Subject: [PATCH 03/10] Inherit `area_fraction_top`, `center_time` and `median_time` from peaklets (#1503) * Inherit `area_fraction_top`, `center_time` and `median_time` from peaklets * Debug * Minor change * Add comment * Debug * Reform * Debug * Debug --- .gitignore | 3 ++ .../plugins/events/event_basics_vanilla.py | 3 +- .../plugins/events/event_top_bottom_params.py | 13 ++++---- .../plugins/events_nv/event_waveform_nv.py | 8 ++--- straxen/plugins/merged_s2s/merged_s2s.py | 17 +++++----- .../peaklet_classification_vanilla.py | 8 +---- straxen/plugins/peaklets/peaklets.py | 27 ++++++++++------ straxen/plugins/peaks/peak_basics_vanilla.py | 32 ++++--------------- .../plugins/peaks/peak_top_bottom_params.py | 2 +- tests/test_peaklet_processing.py | 1 + tests/test_peaks.py | 15 --------- 11 files changed, 52 insertions(+), 77 deletions(-) diff --git a/.gitignore b/.gitignore index 9af983f30..f505aa121 100644 --- a/.gitignore +++ b/.gitignore @@ -74,3 +74,6 @@ docs/source/reference/data_kinds* docs/source/reference/release_notes.rst test_dict.json + +*.pkl +*.pkl.gz diff --git a/straxen/plugins/events/event_basics_vanilla.py b/straxen/plugins/events/event_basics_vanilla.py index 4ec1278cf..86510c88e 100644 --- a/straxen/plugins/events/event_basics_vanilla.py +++ b/straxen/plugins/events/event_basics_vanilla.py @@ -114,7 +114,8 @@ def _set_dtype_requirements(self): # Properties to store for each peak (main and alternate S1 and S2) self.peak_properties = ( ("time", np.int64, "start time since unix epoch [ns]"), - ("center_time", np.int64, "weighted center time since unix epoch [ns]"), + ("center_time", np.int64, "weighted average center time since unix epoch [ns]"), + ("median_time", np.float32, "weighted relative median time of the peak [ns]"), ("endtime", np.int64, "end time since unix epoch [ns]"), ("area", np.float32, "area, uncorrected [PE]"), ("n_channels", np.int16, "count of contributing PMTs"), diff --git a/straxen/plugins/events/event_top_bottom_params.py b/straxen/plugins/events/event_top_bottom_params.py index 05aef18ef..ee3ae8694 100644 --- a/straxen/plugins/events/event_top_bottom_params.py +++ b/straxen/plugins/events/event_top_bottom_params.py @@ -91,6 +91,9 @@ def infer_dtype(self): def compute(self, events): result = np.zeros(events.shape, dtype=self.dtype) + if not len(events): + return result + result["time"], result["endtime"] = events["time"], strax.endtime(events) peak_dtype = strax.peak_dtype(n_channels=straxen.n_tpc_pmts, store_data_top=False) for type_ in self.ptypes: @@ -129,15 +132,13 @@ def compute(self, events): result[f"{type_}_center_time_{arr_}"][mask] += recalc_ctime[mask].astype(int) # computing widths ## # zero or undefined area peaks should have nans - strax.compute_widths(fpeaks_) + _, width, area_decile_from_midpoint = strax.compute_widths(fpeaks_) result[f"{type_}_rise_time_{arr_}"][:] = np.nan - result[f"{type_}_rise_time_{arr_}"][mask] = -fpeaks_["area_decile_from_midpoint"][ - mask - ][:, 1] + result[f"{type_}_rise_time_{arr_}"][mask] = -area_decile_from_midpoint[mask][:, 1] result[f"{type_}_range_50p_area_{arr_}"][:] = np.nan - result[f"{type_}_range_50p_area_{arr_}"][mask] = fpeaks_["width"][mask][:, 5] + result[f"{type_}_range_50p_area_{arr_}"][mask] = width[mask][:, 5] result[f"{type_}_range_90p_area_{arr_}"][:] = np.nan - result[f"{type_}_range_90p_area_{arr_}"][mask] = fpeaks_["width"][mask][:, 9] + result[f"{type_}_range_90p_area_{arr_}"][mask] = width[mask][:, 9] # Difference between center times of top and bottom arrays result[f"{type_}_center_time_diff_top_bot"] = ( result[f"{type_}_center_time_top"] - result[f"{type_}_center_time_bot"] diff --git a/straxen/plugins/events_nv/event_waveform_nv.py b/straxen/plugins/events_nv/event_waveform_nv.py index 736a27177..6f3d9eb57 100644 --- a/straxen/plugins/events_nv/event_waveform_nv.py +++ b/straxen/plugins/events_nv/event_waveform_nv.py @@ -52,7 +52,7 @@ def compute(self, events_nv, records_nv, start, end): _tmp_events["length"] = (events_nv["endtime"] - events_nv["time"]) // 2 _tmp_events["dt"] = 2 strax.simple_summed_waveform(records_nv, _tmp_events, self.to_pe) - strax.compute_widths(_tmp_events) + strax.compute_properties(_tmp_events) strax.copy_to_buffer(_tmp_events, events_waveform, "_temp_nv_evts_cpy") events_waveform["range_50p_area"] = _tmp_events["width"][:, 5] @@ -66,9 +66,7 @@ def compute(self, events_nv, records_nv, start, end): def veto_event_waveform_dtype( n_samples_wf: int = 200, ) -> list: - dtype = [] - dtype += strax.time_dt_fields # because mutable - dtype += [ + dtype = strax.time_dt_fields + [ (("Waveform data in PE/sample (not PE/ns!)", "data"), np.float32, n_samples_wf), (("Width (in ns) of the central 50% area of the peak", "range_50p_area"), np.float32), (("Width (in ns) of the central 90% area of the peak", "range_90p_area"), np.float32), @@ -100,6 +98,8 @@ def _temp_event_data_type(n_samples_wf: int = 150, n_widths: int = 11) -> list: np.float32, n_samples_wf, ), + (("Weighted average center time of the peak [ns]", "center_time"), np.int64), + (("Weighted relative median time of the peak [ns]", "median_time"), np.float32), (("Peak widths in range of central area fraction [ns]", "width"), np.float32, n_widths), ( ("Peak widths: time between nth and 5th area decile [ns]", "area_decile_from_midpoint"), diff --git a/straxen/plugins/merged_s2s/merged_s2s.py b/straxen/plugins/merged_s2s/merged_s2s.py index 8cdd1a43e..74a614cb5 100644 --- a/straxen/plugins/merged_s2s/merged_s2s.py +++ b/straxen/plugins/merged_s2s/merged_s2s.py @@ -20,6 +20,10 @@ class MergedS2s(strax.OverlapWindowPlugin): data_kind = "merged_s2s" provides = "merged_s2s" + n_tpc_pmts = straxen.URLConfig(type=int, help="Number of TPC PMTs") + + n_top_pmts = straxen.URLConfig(type=int, help="Number of top TPC array PMTs") + s2_merge_max_duration = straxen.URLConfig( default=50_000, infer_type=False, @@ -52,10 +56,6 @@ class MergedS2s(strax.OverlapWindowPlugin): ), ) - n_top_pmts = straxen.URLConfig(type=int, help="Number of top TPC array PMTs") - - n_tpc_pmts = straxen.URLConfig(type=int, help="Number of TPC PMTs") - merged_s2s_get_window_size_factor = straxen.URLConfig( default=5, type=int, track=False, help="Factor of the window size for the merged_s2s plugin" ) @@ -141,19 +141,20 @@ def compute(self, peaklets, lone_hits): lh["length"] = lh["right_integration"] - lh["left_integration"] lh = strax.sort_by_time(lh) - _n_top_pmts = self.n_top_pmts if "data_top" in self.dtype.names else -1 + _store_data_top = "data_top" in self.dtype.names _store_data_start = "data_start" in self.dtype.names strax.add_lone_hits( merged_s2s, lh, self.to_pe, - n_top_channels=_n_top_pmts, + n_top_channels=self.n_top_pmts, + store_data_top=_store_data_top, store_data_start=_store_data_start, ) - strax.compute_widths(merged_s2s) + strax.compute_properties(merged_s2s, n_top_channels=self.n_top_pmts) - if (_n_top_pmts <= 0) or (not _store_data_start): + if (not _store_data_top) or (not _store_data_start): merged_s2s = drop_data_field(merged_s2s, self.dtype, "_drop_data_field_merged_s2s") return merged_s2s diff --git a/straxen/plugins/peaklets/peaklet_classification_vanilla.py b/straxen/plugins/peaklets/peaklet_classification_vanilla.py index 8f74b3546..ce297a325 100644 --- a/straxen/plugins/peaklets/peaklet_classification_vanilla.py +++ b/straxen/plugins/peaklets/peaklet_classification_vanilla.py @@ -42,8 +42,6 @@ class PeakletClassificationVanilla(strax.Plugin): ), ) - n_top_pmts = straxen.URLConfig(default=straxen.n_top_pmts, type=int, help="Number of top PMTs") - s1_max_rise_time_post100 = straxen.URLConfig( default=200, type=(int, float), help="Maximum S1 rise time for > 100 PE [ns]" ) @@ -75,10 +73,6 @@ def compute(self, peaklets): # Properties needed for classification: rise_time = -peaklets["area_decile_from_midpoint"][:, 1] n_channels = (peaklets["area_per_channel"] > 0).sum(axis=1) - n_top = self.n_top_pmts - area_top = peaklets["area_per_channel"][:, :n_top].sum(axis=1) - area_total = peaklets["area_per_channel"].sum(axis=1) - area_fraction_top = area_top / area_total is_large_s1 = peaklets["area"] >= 100 is_large_s1 &= rise_time <= self.s1_max_rise_time_post100 @@ -91,7 +85,7 @@ def compute(self, peaklets): ) is_small_s1 &= rise_time < self.upper_rise_time_aft_boundary( - area_fraction_top, + peaklets["area_fraction_top"], *self.s1_risetime_aft_parameters, *self.s1_flatten_threshold_aft, ) diff --git a/straxen/plugins/peaklets/peaklets.py b/straxen/plugins/peaklets/peaklets.py index 1b056ae55..ecd1f006d 100644 --- a/straxen/plugins/peaklets/peaklets.py +++ b/straxen/plugins/peaklets/peaklets.py @@ -260,19 +260,18 @@ def compute(self, records, start, end): self.clip_peaklet_times(hitlets, start, end) rlinks = strax.record_links(records) - # If store_data_top is false, don't digitize the top array - _n_top_pmts = self.n_top_pmts if self.store_data_top else -1 strax.sum_waveform( peaklets, hitlets, records, rlinks, self.to_pe, - n_top_channels=_n_top_pmts, + n_top_channels=self.n_top_pmts, + store_data_top=self.store_data_top, store_data_start=self.store_data_start, ) - strax.compute_widths(peaklets) + strax.compute_properties(peaklets, n_top_channels=self.n_top_pmts) # Split peaks using low-split natural breaks; # see https://github.com/XENONnT/straxen/pull/45 @@ -289,7 +288,8 @@ def compute(self, records, start, end): filter_wing_width=self.peak_split_filter_wing_width, min_area=self.peak_split_min_area, do_iterations=self.peak_split_iterations, - n_top_channels=_n_top_pmts, + n_top_channels=self.n_top_pmts, + store_data_top=self.store_data_top, store_data_start=self.store_data_start, ) @@ -306,12 +306,15 @@ def compute(self, records, start, end): self.to_pe, reference_length=self.saturation_reference_length, min_reference_length=self.saturation_min_reference_length, - n_top_channels=_n_top_pmts, + n_top_channels=self.n_top_pmts, + store_data_top=self.store_data_top, store_data_start=self.store_data_start, ) # Compute the width again for corrected peaks - strax.compute_widths(peaklets, select_peaks_indices=peak_list) + strax.compute_properties( + peaklets, n_top_channels=self.n_top_pmts, select_peaks_indices=peak_list + ) # Compute tight coincidence level. # Making this a separate plugin would @@ -347,7 +350,7 @@ def compute(self, records, start, end): peaklets["n_hits"] = counts # Drop the data_top or data_start field - if (_n_top_pmts <= 0) or (not self.store_data_start): + if (not self.store_data_top) or (not self.store_data_start): peaklets = drop_data_field(peaklets, self.dtype_for("peaklets")) # Check channel of peaklets @@ -441,6 +444,7 @@ def peak_saturation_correction( min_reference_length=20, use_classification=False, n_top_channels=0, + store_data_top=False, store_data_start=False, ): """Correct the area and per pmt area of peaks from saturation. @@ -456,6 +460,8 @@ def peak_saturation_correction( samples :param use_classification: Option of using classification to pick only S2 :param n_top_channels: Number of top array channels. + :param store_data_top: Boolean which indicates whether to store the top array waveform in the + peak. :param store_data_start: Boolean which indicates whether to store the first samples of the waveform in the peak. @@ -537,8 +543,9 @@ def peak_saturation_correction( records, rlinks, to_pe, - n_top_channels, - store_data_start, + n_top_channels=n_top_channels, + store_data_top=store_data_top, + store_data_start=store_data_start, select_peaks_indices=peak_list, ) return peak_list diff --git a/straxen/plugins/peaks/peak_basics_vanilla.py b/straxen/plugins/peaks/peak_basics_vanilla.py index 67006d1f9..6c16908d2 100644 --- a/straxen/plugins/peaks/peak_basics_vanilla.py +++ b/straxen/plugins/peaks/peak_basics_vanilla.py @@ -18,10 +18,6 @@ class PeakBasicsVanilla(strax.Plugin): depends_on = "peaks" provides = "peak_basics" - n_top_pmts = straxen.URLConfig( - default=straxen.n_top_pmts, infer_type=False, help="Number of top PMTs" - ) - check_peak_sum_area_rtol = straxen.URLConfig( default=None, track=False, @@ -36,7 +32,7 @@ class PeakBasicsVanilla(strax.Plugin): def infer_dtype(self): dtype = strax.time_fields + [ - (("Weighted center time of the peak [ns]", "center_time"), np.int64), + (("Weighted average center time of the peak [ns]", "center_time"), np.int64), (("Peak integral in PE", "area"), np.float32), (("Number of hits contributing at least one sample to the peak", "n_hits"), np.int32), (("Number of PMTs contributing to the peak", "n_channels"), np.int16), @@ -45,15 +41,10 @@ def infer_dtype(self): (("Total number of saturated channels", "n_saturated_channels"), np.int16), (("Width (in ns) of the central 50% area of the peak", "range_50p_area"), np.float32), (("Width (in ns) of the central 90% area of the peak", "range_90p_area"), np.float32), - ( - ( - "Fraction of area seen by the top array (NaN for peaks with non-positive area)", - "area_fraction_top", - ), - np.float32, - ), + (("Fraction of area seen by the top array", "area_fraction_top"), np.float32), (("Length of the peak waveform in samples", "length"), np.int32), (("Time resolution of the peak waveform in ns", "dt"), np.int16), + (("Weighted relative median time of the peak [ns]", "median_time"), np.float32), (("Time between 10% and 50% area quantiles [ns]", "rise_time"), np.float32), ( ("Number of PMTs with hits within tight range of mean", "tight_coincidence"), @@ -74,7 +65,9 @@ def infer_dtype(self): def compute(self, peaks): p = peaks r = np.zeros(len(p), self.dtype) - needed_fields = "time length dt area type max_diff min_diff" + needed_fields = ( + "time center_time length dt median_time area area_fraction_top type max_diff min_diff" + ) for q in needed_fields.split(): r[q] = p[q] r["endtime"] = p["time"] + p["dt"] * p["length"] @@ -86,22 +79,11 @@ def compute(self, peaks): r["max_pmt_area"] = np.max(p["area_per_channel"], axis=1) r["tight_coincidence"] = p["tight_coincidence"] r["n_saturated_channels"] = p["n_saturated_channels"] - - n_top = self.n_top_pmts - area_top = p["area_per_channel"][:, :n_top].sum(axis=1) - # Recalculate to prevent numerical inaccuracy #442 - area_total = p["area_per_channel"].sum(axis=1) - # Negative-area peaks get NaN AFT - m = p["area"] > 0 - r["area_fraction_top"][m] = area_top[m] / area_total[m] - r["area_fraction_top"][~m] = np.nan r["rise_time"] = -p["area_decile_from_midpoint"][:, 1] if self.check_peak_sum_area_rtol is not None: + area_total = p["area_per_channel"].sum(axis=1) self.check_area(area_total, p, self.check_peak_sum_area_rtol) - # Negative or zero-area peaks have centertime at startime - r["center_time"][~m] = p["time"][~m] - r["center_time"][m] = strax.compute_center_time(p[m]) return r @staticmethod diff --git a/straxen/plugins/peaks/peak_top_bottom_params.py b/straxen/plugins/peaks/peak_top_bottom_params.py index 8ed959807..36abf4788 100644 --- a/straxen/plugins/peaks/peak_top_bottom_params.py +++ b/straxen/plugins/peaks/peak_top_bottom_params.py @@ -90,7 +90,7 @@ def compute(self, peaks): result[f"center_time_{arr_}"] = peaks["time"] result[f"center_time_{arr_}"][mask] += recalc_ctime[mask].astype(int) # computing widths times - strax.compute_widths(fpeaks_) + strax.compute_properties(fpeaks_) result[f"rise_time_{arr_}"][:] = np.nan result[f"rise_time_{arr_}"][mask] = -fpeaks_["area_decile_from_midpoint"][mask][:, 1] result[f"range_50p_area_{arr_}"][:] = np.nan diff --git a/tests/test_peaklet_processing.py b/tests/test_peaklet_processing.py index 39618f5e2..a94ec7a1e 100644 --- a/tests/test_peaklet_processing.py +++ b/tests/test_peaklet_processing.py @@ -18,6 +18,7 @@ def get_filled_peaks(peak_length, data_length, n_widths): ] if n_widths is not None: dtype += [ + (("median_time of p", "median_time"), np.float64), (("width of p", "width"), (np.float64, n_widths)), ( ("area_decile_from_midpoint of p", "area_decile_from_midpoint"), diff --git a/tests/test_peaks.py b/tests/test_peaks.py index f5fb329e4..7911a23d0 100644 --- a/tests/test_peaks.py +++ b/tests/test_peaks.py @@ -37,21 +37,6 @@ def setUp(self): ) ) - @settings(deadline=None) - @given( - strategies.integers(min_value=0, max_value=TEST_DATA_LENGTH - 1), - ) - def test_aft_equals1(self, test_peak_idx): - """Fill top array with area 1.""" - test_data = self.get_test_peaks() - test_data[test_peak_idx]["area_per_channel"][: self.n_top] = 1 - test_data[test_peak_idx]["area"] = np.sum(test_data[test_peak_idx]["area_per_channel"]) - test_data[test_peak_idx]["data"][: test_data[test_peak_idx]["length"]] = ( - test_data[test_peak_idx]["area"] / test_data[test_peak_idx]["length"] - ) - peaks = self.peaks_basics.compute(test_data) - assert peaks[test_peak_idx]["area_fraction_top"] == 1 - @settings(deadline=None) @given( strategies.floats( From 3a310e4414ed8a83fed2820a00f158b4700584f2 Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Fri, 20 Dec 2024 00:54:33 -0500 Subject: [PATCH 04/10] Bump version of changed plugins in #1503 (#1504) --- straxen/plugins/events/event_basics_vanilla.py | 2 +- straxen/plugins/events/event_top_bottom_params.py | 2 +- straxen/plugins/events_nv/event_waveform_nv.py | 2 +- straxen/plugins/merged_s2s/merged_s2s.py | 2 +- straxen/plugins/peaklets/peaklet_classification_vanilla.py | 2 +- straxen/plugins/peaklets/peaklets.py | 2 +- straxen/plugins/peaks/peak_basics_vanilla.py | 2 +- straxen/plugins/peaks/peak_top_bottom_params.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/straxen/plugins/events/event_basics_vanilla.py b/straxen/plugins/events/event_basics_vanilla.py index 86510c88e..fb3842a52 100644 --- a/straxen/plugins/events/event_basics_vanilla.py +++ b/straxen/plugins/events/event_basics_vanilla.py @@ -17,7 +17,7 @@ class EventBasicsVanilla(strax.Plugin): """ - __version__ = "1.3.3" + __version__ = "1.3.4" depends_on = ("events", "peak_basics", "peak_positions", "peak_proximity") provides = "event_basics" diff --git a/straxen/plugins/events/event_top_bottom_params.py b/straxen/plugins/events/event_top_bottom_params.py index ee3ae8694..7f63f83ba 100644 --- a/straxen/plugins/events/event_top_bottom_params.py +++ b/straxen/plugins/events/event_top_bottom_params.py @@ -12,7 +12,7 @@ class EventTopBottomParams(strax.Plugin): depends_on = ("event_info", "event_waveform") provides = "event_top_bottom_params" - __version__ = "0.0.0" + __version__ = "0.0.1" def infer_dtype(self): # Populating data type information diff --git a/straxen/plugins/events_nv/event_waveform_nv.py b/straxen/plugins/events_nv/event_waveform_nv.py index 6f3d9eb57..9bc8b1afd 100644 --- a/straxen/plugins/events_nv/event_waveform_nv.py +++ b/straxen/plugins/events_nv/event_waveform_nv.py @@ -11,7 +11,7 @@ class nVETOEventWaveform(strax.Plugin): """Plugin which computes the summed waveform as well as some shape properties of the NV events.""" - __version__ = "0.0.1" + __version__ = "0.0.2" depends_on = "events_nv", "records_nv" provides = "event_waveform_nv" diff --git a/straxen/plugins/merged_s2s/merged_s2s.py b/straxen/plugins/merged_s2s/merged_s2s.py index 74a614cb5..07778e18f 100644 --- a/straxen/plugins/merged_s2s/merged_s2s.py +++ b/straxen/plugins/merged_s2s/merged_s2s.py @@ -14,7 +14,7 @@ class MergedS2s(strax.OverlapWindowPlugin): """Merge together peaklets if peak finding favours that they would form a single peak instead.""" - __version__ = "1.1.0" + __version__ = "1.1.1" depends_on: Tuple[str, ...] = ("peaklets", "peaklet_classification", "lone_hits") data_kind = "merged_s2s" diff --git a/straxen/plugins/peaklets/peaklet_classification_vanilla.py b/straxen/plugins/peaklets/peaklet_classification_vanilla.py index ce297a325..b024edb84 100644 --- a/straxen/plugins/peaklets/peaklet_classification_vanilla.py +++ b/straxen/plugins/peaklets/peaklet_classification_vanilla.py @@ -12,7 +12,7 @@ class PeakletClassificationVanilla(strax.Plugin): """Classify peaklets as unknown, S1, or S2.""" - __version__ = "3.0.3" + __version__ = "3.0.4" depends_on = "peaklets" provides: Union[str, tuple] = "peaklet_classification" diff --git a/straxen/plugins/peaklets/peaklets.py b/straxen/plugins/peaklets/peaklets.py index ecd1f006d..271d07af6 100644 --- a/straxen/plugins/peaklets/peaklets.py +++ b/straxen/plugins/peaklets/peaklets.py @@ -39,7 +39,7 @@ class Peaklets(strax.Plugin): parallel = "process" compressor = "zstd" - __version__ = "1.2.0" + __version__ = "1.2.1" peaklet_gap_threshold = straxen.URLConfig( default=700, infer_type=False, help="No hits for this many ns triggers a new peak" diff --git a/straxen/plugins/peaks/peak_basics_vanilla.py b/straxen/plugins/peaks/peak_basics_vanilla.py index 6c16908d2..42c82067f 100644 --- a/straxen/plugins/peaks/peak_basics_vanilla.py +++ b/straxen/plugins/peaks/peak_basics_vanilla.py @@ -14,7 +14,7 @@ class PeakBasicsVanilla(strax.Plugin): """ - __version__ = "0.1.5" + __version__ = "0.1.6" depends_on = "peaks" provides = "peak_basics" diff --git a/straxen/plugins/peaks/peak_top_bottom_params.py b/straxen/plugins/peaks/peak_top_bottom_params.py index 36abf4788..b64d4cb8a 100644 --- a/straxen/plugins/peaks/peak_top_bottom_params.py +++ b/straxen/plugins/peaks/peak_top_bottom_params.py @@ -11,7 +11,7 @@ class PeakTopBottomParams(strax.Plugin): depends_on = ("peaks", "peak_basics") provides = "peak_top_bottom_params" - __version__ = "0.0.0" + __version__ = "0.0.1" def infer_dtype(self): dtype = [] From 57e0f336d863b79ad80eebad9d861f14c0f580a6 Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Mon, 23 Dec 2024 19:09:17 -0500 Subject: [PATCH 05/10] Clean unnecessary codes (#1507) --- straxen/plugins/events/event_basics_som.py | 4 ---- straxen/plugins/events/event_basics_vanilla.py | 9 ++++----- straxen/plugins/peaks/peak_positions.py | 2 +- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/straxen/plugins/events/event_basics_som.py b/straxen/plugins/events/event_basics_som.py index faf5529fc..ed653fd96 100644 --- a/straxen/plugins/events/event_basics_som.py +++ b/straxen/plugins/events/event_basics_som.py @@ -25,7 +25,3 @@ def _set_dtype_requirements(self): ("loc_y_som", np.int16, "y location of the peak(let) in the SOM"), ] self.peak_properties = tuple(self.peak_properties) - - def compute(self, events, peaks): - result = super().compute(events, peaks) - return result diff --git a/straxen/plugins/events/event_basics_vanilla.py b/straxen/plugins/events/event_basics_vanilla.py index fb3842a52..ec7703776 100644 --- a/straxen/plugins/events/event_basics_vanilla.py +++ b/straxen/plugins/events/event_basics_vanilla.py @@ -216,19 +216,18 @@ def compute(self, events, peaks): result["endtime"] = events["endtime"] result["event_number"] = events["event_number"] - self.fill_events(result, events, split_peaks) + self.fill_events(result, split_peaks) return result # If copy_largest_peaks_into_event is ever numbafied, also numbafy this function - def fill_events(self, result_buffer, events, split_peaks): + def fill_events(self, result_buffer, split_peaks): """Loop over the events and peaks within that event.""" - for event_i, _ in enumerate(events): - peaks_in_event_i = split_peaks[event_i] + for event_i, peaks_in_event_i in enumerate(split_peaks): n_peaks = len(peaks_in_event_i) result_buffer[event_i]["n_peaks"] = n_peaks if not n_peaks: - raise ValueError(f"No peaks within event?\n{events[event_i]}") + raise ValueError(f"No peaks within event {event_i}?") self.fill_result_i(result_buffer[event_i], peaks_in_event_i) diff --git a/straxen/plugins/peaks/peak_positions.py b/straxen/plugins/peaks/peak_positions.py index 1cab2b6be..a99ab8a2e 100644 --- a/straxen/plugins/peaks/peak_positions.py +++ b/straxen/plugins/peaks/peak_positions.py @@ -34,7 +34,7 @@ class PeakPositions(strax.MergeOnlyPlugin): ) def infer_dtype(self): - dtype = strax.merged_dtype([self.deps[d].dtype_for(d) for d in self.depends_on]) + dtype = strax.merged_dtype([self.deps[d].dtype_for(d) for d in sorted(self.depends_on)]) dtype += [ ("x", np.float32, "Reconstructed S2 X position (cm), uncorrected"), ("y", np.float32, "Reconstructed S2 Y position (cm), uncorrected"), From 5bb4e0494d02424ffb0563f758c6ffb644dfe4d8 Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Mon, 23 Dec 2024 20:22:58 -0500 Subject: [PATCH 06/10] Clean chunk after computing `records` (#1508) * Clean chunk after computing `records` * Fix bug * Minor change --- straxen/plugins/records/records.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/straxen/plugins/records/records.py b/straxen/plugins/records/records.py index ad28fe11c..56be3e3ca 100644 --- a/straxen/plugins/records/records.py +++ b/straxen/plugins/records/records.py @@ -36,6 +36,9 @@ class PulseProcessing(strax.Plugin): rechunk_on_save = immutabledict(records=False, veto_regions=True, pulse_counts=True) compressor = "zstd" + # remove the heavy raw_records data from memory after processing + clean_chunk_after_compute = True + depends_on = "raw_records" provides: Tuple[str, ...] = ("records", "veto_regions", "pulse_counts") From dfcf36bfec02b3c7342d01d2eb816533a5dc428c Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Tue, 24 Dec 2024 14:09:18 -0500 Subject: [PATCH 07/10] Add a line of comment about memory optimization (#1509) * Add a line of comment about memory optimization * Minor change --- straxen/plugins/peaklets/peaklets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/straxen/plugins/peaklets/peaklets.py b/straxen/plugins/peaklets/peaklets.py index 271d07af6..c765da8f1 100644 --- a/straxen/plugins/peaklets/peaklets.py +++ b/straxen/plugins/peaklets/peaklets.py @@ -120,7 +120,7 @@ class Peaklets(strax.Plugin): ) store_data_start = straxen.URLConfig( - default=True, type=bool, help="Save the start time of the waveform with 10 ns dt" + default=True, type=bool, help="Save the start time of the waveform with minimum dt" ) saturation_correction_on = straxen.URLConfig( @@ -250,6 +250,8 @@ def compute(self, records, start, end): # including the left and right extension. # (We are not going to use the actual hitlet data_type here.) hitlets = hits + # This line will not clean the memory, but only prevent misinterpretation + # only if sys.getrefcount(hits) - 1 is 1, we can clean the memory del hits # Extend hits into hitlets and clip at chunk boundaries: From 9815352afb0bd028cbfe6f47f07f09ce987989c4 Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Fri, 27 Dec 2024 16:36:14 -0500 Subject: [PATCH 08/10] Bump to v3.0.1 (#1510) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update HISTORY.md * Bump version: 3.0.0 → 3.0.1 * Update dependency --- .bumpversion.cfg | 2 +- HISTORY.md | 39 +++++++++++++++++++++++++++++++++++++++ pyproject.toml | 4 ++-- straxen/__init__.py | 2 +- 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 7c0ba3725..4c1feebb1 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 3.0.0 +current_version = 3.0.1 files = straxen/__init__.py commit = True tag = True diff --git a/HISTORY.md b/HISTORY.md index 33a0b6c0f..dad348eca 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,3 +1,42 @@ +3.0.1 / 2024-12-27 +------------------- +* Fix run_doc for led plugin by @GiovanniVolta in https://github.com/XENONnT/straxen/pull/1462 +* Check RSE in `_find` method of `RucioRemoteFrontend` by @dachengx in https://github.com/XENONnT/straxen/pull/1464 +* Garbage collection after calculated each chunk in `peak_positions_mlp` by @dachengx in https://github.com/XENONnT/straxen/pull/1467 +* Enforce stable sorting in `np.sort` and `np.argsort` by @dachengx in https://github.com/XENONnT/straxen/pull/1468 +* Clean `deprecate_kwarg` by @dachengx in https://github.com/XENONnT/straxen/pull/1470 +* Update strax version to v2.0.1 by @dachengx in https://github.com/XENONnT/straxen/pull/1473 +* Remove expedients plugins because SOM will be default by @dachengx in https://github.com/XENONnT/straxen/pull/1472 +* Remove 1T related codes by @dachengx in https://github.com/XENONnT/straxen/pull/1476 +* Use SOM peaklets classification by default by @dachengx in https://github.com/XENONnT/straxen/pull/1471 +* Fix theta uncertainty bug by @napoliion in https://github.com/XENONnT/straxen/pull/1466 +* Remove URLConfig warning about sorting by @dachengx in https://github.com/XENONnT/straxen/pull/1477 +* Merge branch 'sr1_leftovers' into master by @dachengx in https://github.com/XENONnT/straxen/pull/1478 +* Fix small bug in CNF by @dachengx in https://github.com/XENONnT/straxen/pull/1479 +* Remove GCN & CNN S2 pos-rec by @dachengx in https://github.com/XENONnT/straxen/pull/1484 +* Set CNF as the default S2 (x, y) position-reconstruction by @dachengx in https://github.com/XENONnT/straxen/pull/1486 +* Prototype of peaklets-level (x, y) S2 position reconstruction by @dachengx in https://github.com/XENONnT/straxen/pull/1482 +* Rename old `PeakletClassification` as `PeakletClassificationVanilla` by @dachengx in https://github.com/XENONnT/straxen/pull/1487 +* Remove Bayes models by @dachengx in https://github.com/XENONnT/straxen/pull/1488 +* Rename `defualt_run_comments` -> `default_run_comments` by @dachengx in https://github.com/XENONnT/straxen/pull/1489 +* Accelerate Euclidean distance by numba by @dachengx in https://github.com/XENONnT/straxen/pull/1493 +* Move `set_nan_defaults` to be a stand-alone function by @dachengx in https://github.com/XENONnT/straxen/pull/1497 +* Set CNF as the default S2 (x, y) position-reconstruction by @dachengx in https://github.com/XENONnT/straxen/pull/1494 +* Back to fixed window in LED calibration by @GiovanniVolta in https://github.com/XENONnT/straxen/pull/1499 +* Move `compute_center_times` from straxen to strax by @dachengx in https://github.com/XENONnT/straxen/pull/1501 +* Use numpy and strax native dtypes, not `" Date: Fri, 27 Dec 2024 19:51:14 -0500 Subject: [PATCH 09/10] Collect SOM dtype at one place (#1511) --- straxen/plugins/events/event_basics_som.py | 12 ++------ .../peaklets/peaklet_classification_som.py | 30 ++++++++++++------- straxen/plugins/peaks/peak_basics_som.py | 13 ++------ straxen/plugins/peaks/peaks_som.py | 2 +- 4 files changed, 25 insertions(+), 32 deletions(-) diff --git a/straxen/plugins/events/event_basics_som.py b/straxen/plugins/events/event_basics_som.py index ed653fd96..f236e5fb3 100644 --- a/straxen/plugins/events/event_basics_som.py +++ b/straxen/plugins/events/event_basics_som.py @@ -1,7 +1,6 @@ import strax -import numpy as np - from straxen.plugins.events.event_basics_vanilla import EventBasicsVanilla +from straxen.plugins.peaklets.peaklet_classification_som import som_additional_fields export, __all__ = strax.exporter() @@ -17,11 +16,4 @@ def _set_dtype_requirements(self): # Properties to store for each peak (main and alternate S1 and S2) # Add here SOM types: super()._set_dtype_requirements() - self.peak_properties = list(self.peak_properties) - self.peak_properties += [ - ("som_sub_type", np.int32, "SOM subtype of the peak(let)"), - ("old_type", np.int8, "Old type of the peak(let)"), - ("loc_x_som", np.int16, "x location of the peak(let) in the SOM"), - ("loc_y_som", np.int16, "y location of the peak(let) in the SOM"), - ] - self.peak_properties = tuple(self.peak_properties) + self.peak_properties += tuple(som_additional_fields) diff --git a/straxen/plugins/peaklets/peaklet_classification_som.py b/straxen/plugins/peaklets/peaklet_classification_som.py index fad9fa8fd..81b693839 100644 --- a/straxen/plugins/peaklets/peaklet_classification_som.py +++ b/straxen/plugins/peaklets/peaklet_classification_som.py @@ -8,6 +8,16 @@ export, __all__ = strax.exporter() +__all__.extend(["som_additional_fields"]) + + +som_additional_fields = [ + ("som_sub_type", np.int32, "SOM subtype of the peak(let)"), + ("vanilla_type", np.int8, "Vanilla type of the peak(let)"), + ("loc_x_som", np.int16, "x location of the peak(let) in the SOM"), + ("loc_y_som", np.int16, "y location of the peak(let) in the SOM"), +] + @export class PeakletClassificationSOM(PeakletClassificationVanilla): @@ -30,14 +40,13 @@ class PeakletClassificationSOM(PeakletClassificationVanilla): __version__ = "0.2.0" child_plugin = True - dtype = strax.peak_interval_dtype + [ - ("type", np.int8, "Classification of the peak(let)"), - ("som_sub_type", np.int32, "SOM subtype of the peak(let)"), - ("old_type", np.int8, "Old type of the peak(let)"), - ("som_type", np.int8, "SOM type of the peak(let)"), - ("loc_x_som", np.int16, "x location of the peak(let) in the SOM"), - ("loc_y_som", np.int16, "y location of the peak(let) in the SOM"), - ] + dtype = ( + strax.peak_interval_dtype + + [ + ("type", np.int8, "Classification of the peak(let)"), + ] + + som_additional_fields + ) som_files = straxen.URLConfig( default="resource://xedocs://som_classifiers?attr=value&version=v1&run_id=045000&fmt=npy" @@ -67,7 +76,7 @@ def compute(self, peaklets): peaklet_with_som = np.zeros(len(peaklets_classifcation), dtype=self.dtype) strax.copy_to_buffer(peaklets_classifcation, peaklet_with_som, "_copy_peaklets_information") - peaklet_with_som["old_type"] = peaklets_classifcation["type"] + peaklet_with_som["vanilla_type"] = peaklets_classifcation["type"] del peaklets_classifcation # SOM classification @@ -86,11 +95,10 @@ def compute(self, peaklets): peaklet_with_som["som_sub_type"][_is_s1_or_s2] = som_sub_type peaklet_with_som["loc_x_som"][_is_s1_or_s2] = x_som peaklet_with_som["loc_y_som"][_is_s1_or_s2] = y_som - peaklet_with_som["som_type"][_is_s1_or_s2] = strax_type if self.use_som_as_default: peaklet_with_som["type"][_is_s1_or_s2] = strax_type else: - peaklet_with_som["type"] = peaklet_with_som["old_type"] + peaklet_with_som["type"] = peaklet_with_som["vanilla_type"] return peaklet_with_som diff --git a/straxen/plugins/peaks/peak_basics_som.py b/straxen/plugins/peaks/peak_basics_som.py index b4df54e3a..105f8ac99 100644 --- a/straxen/plugins/peaks/peak_basics_som.py +++ b/straxen/plugins/peaks/peak_basics_som.py @@ -1,5 +1,5 @@ -import numpy as np import strax +from straxen.plugins.peaklets.peaklet_classification_som import som_additional_fields from straxen.plugins.peaks.peak_basics_vanilla import PeakBasicsVanilla export, __all__ = strax.exporter() @@ -14,17 +14,10 @@ class PeakBasicsSOM(PeakBasicsVanilla): def infer_dtype(self): dtype = super().infer_dtype() - additional_fields = [ - ("som_sub_type", np.int32, "SOM subtype of the peak(let)"), - ("old_type", np.int8, "Old type of the peak(let)"), - ("loc_x_som", np.int16, "x location of the peak(let) in the SOM"), - ("loc_y_som", np.int16, "y location of the peak(let) in the SOM"), - ] - - return dtype + additional_fields + return dtype + som_additional_fields def compute(self, peaks): peak_basics = super().compute(peaks) - fields_to_copy = ("som_sub_type", "old_type", "loc_x_som", "loc_y_som") + fields_to_copy = strax.to_numpy_dtype(som_additional_fields).names strax.copy_to_buffer(peaks, peak_basics, "_copy_som_information", fields_to_copy) return peak_basics diff --git a/straxen/plugins/peaks/peaks_som.py b/straxen/plugins/peaks/peaks_som.py index fb428a97e..8a879cd7c 100644 --- a/straxen/plugins/peaks/peaks_som.py +++ b/straxen/plugins/peaks/peaks_som.py @@ -34,7 +34,7 @@ def compute(self, peaklets, merged_s2s): _is_merged_s2 = np.isin(result["time"], merged_s2s["time"]) & np.isin( strax.endtime(result), strax.endtime(merged_s2s) ) - result["old_type"][_is_merged_s2] = -1 + result["vanilla_type"][_is_merged_s2] = -1 result["som_sub_type"][_is_merged_s2] = -1 return result From f993f0b573db3354e4f1de60e89e89a1ccf28892 Mon Sep 17 00:00:00 2001 From: Dacheng Xu Date: Thu, 9 Jan 2025 22:45:59 -0500 Subject: [PATCH 10/10] Stop support for list of "take" protocol (#1517) * Stop support for list of "take" protocol * Try to bypass the check of URL * Debug * Debug --- straxen/config/protocols.py | 2 ++ tests/test_url_config.py | 29 ++++++++++++++++++----------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/straxen/config/protocols.py b/straxen/config/protocols.py index 76d6773fe..b351da166 100644 --- a/straxen/config/protocols.py +++ b/straxen/config/protocols.py @@ -65,6 +65,8 @@ def read_json(content: str, **kwargs): @URLConfig.register("take") def get_key(container: Container, take=None, **kwargs): """Return a single element of a container.""" + if not isinstance(container, dict): + raise ValueError(f"Container is not a dict but a {type(container)}") if take is None: return container if not isinstance(take, list): diff --git a/tests/test_url_config.py b/tests/test_url_config.py index 12af3476f..d004b4f61 100644 --- a/tests/test_url_config.py +++ b/tests/test_url_config.py @@ -25,6 +25,12 @@ def generate_random(_): return random.random() +@straxen.URLConfig.register("range") +def generate_range(length): + length = int(length) + return np.arange(length) + + @straxen.URLConfig.register("unpicklable") def return_lamba(_): return lambda x: x @@ -118,9 +124,9 @@ def test_cmt_protocol(self): self.assertTrue(abs(p.test_config - 219203.49884000001) < 1e-2) def test_json_protocol(self): - self.st.set_config({"test_config": "json://[1,2,3]"}) + self.st.set_config({"test_config": 'json://{"a":0}'}) p = self.st.get_single_plugin(nt_test_run_id, "test_data") - self.assertEqual(p.test_config, [1, 2, 3]) + self.assertEqual(p.test_config, {"a": 0}) def test_format_protocol(self): self.st.set_config({"test_config": "format://{run_id}?run_id=plugin.run_id"}) @@ -136,15 +142,16 @@ def test_fsspec_protocol(self): p = self.st.get_single_plugin(nt_test_run_id, "test_data") self.assertEqual(p.test_config, 999) - def test_chained(self): - self.st.set_config({"test_config": "take://json://[1,2,3]?take=0"}) - p = self.st.get_single_plugin(nt_test_run_id, "test_data") - self.assertEqual(p.test_config, 1) - def test_take_nested(self): - self.st.set_config({"test_config": 'take://json://{"a":[1,2,3]}?take=a&take=0'}) + self.st.set_config( + { + "test_config": ( + 'take://json://{"a":{"aa":0,"ab":1},"b":{"ba":2,"bb":3}}?take=b&take=ba' + ) + } + ) p = self.st.get_single_plugin(nt_test_run_id, "test_data") - self.assertEqual(p.test_config, 1) + self.assertEqual(p.test_config, 2) @unittest.skipIf(not straxen.utilix_is_configured(), "No db access, cannot test!") def test_bodedga_get(self): @@ -434,9 +441,9 @@ def test_regex_url_warnings(self): def test_pad_array(self): """Test that pad_array works as expected.""" - + n = 3 self.st.set_config( - {"test_config": "pad-array://json://[1,2,3]?pad_left=2&pad_right=3&pad_value=0"} + {"test_config": f"pad-array://range://{n}?pad_left=2&pad_right=3&pad_value=0"} ) p = self.st.get_single_plugin(nt_test_run_id, "test_data") self.assertEqual(len(p.test_config), 8)