From 0e03e0b056b2a81805bc48fe837a531e73af69de Mon Sep 17 00:00:00 2001 From: ocnkr Date: Thu, 6 Apr 2023 15:12:34 -0400 Subject: [PATCH 1/9] add pattern detection --- pipit/trace.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/pipit/trace.py b/pipit/trace.py index c82c3040..9b2b48e4 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -802,3 +802,82 @@ def multirun_analysis( combined_df = combined_df[function_sums.sort_values(ascending=False).index] return combined_df + + def detect_pattern( + self, + process=0, + start_event="time-loop", + metric="time.exc", + plot=True, + filename="pattern_detection.png", + ): + import stumpy + + self.calc_exc_metrics() + df = self.events.copy(deep=True) + + iterations = len( + df[ + (df["Name"] == start_event) + & (df["Event Type"] == "Enter") + & (df["Process"] == process) + ] + ) + + df = df[df["Process"] == process] + df.reset_index(inplace=True) + + first_loop_enter = df[ + (df["Name"] == start_event) + & (df["Event Type"] == "Enter") + & (df["Process"] == process) + ].index[0] + + last_loop_leave = df[ + (df["Name"] == start_event) + & (df["Event Type"] == "Leave") + & (df["Process"] == process) + ].index[-1] + + df = df.iloc[first_loop_enter + 1 : last_loop_leave] + + x = df.loc[(df[metric].notnull()) & (df["Process"] == process)][ + "Timestamp (ns)" + ].values[:] + y = df.loc[(df[metric].notnull()) & (df["Process"] == process)][metric].values[ + : + ] + + length_of_seq = int(len(y) / iterations) + matrix_profile = stumpy.stump(y, length_of_seq) + dists, inde = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations) + + if plot: + import matplotlib.pyplot as plt + from matplotlib.patches import Rectangle + + fig, axs = plt.subplots(2, sharex=True, gridspec_kw={"hspace": 0}) + plt.suptitle("Pattern Detection", fontsize="20") + + axs[0].plot(y) + axs[0].set_ylabel("Time", fontsize="14") + + for idx in inde[0]: + print(idx, length_of_seq) + rect = Rectangle( + (idx, 0), + length_of_seq * 0.98, + y.max(), + fill=True, + facecolor="lightgrey", + ) + axs[0].add_patch(rect) + + axs[1].set_xlabel("Index", fontsize="14") + axs[1].set_ylabel("Matrix Profile", fontsize="14") + + for idx in inde[0]: + axs[1].axvline(x=idx, linestyle="dashed") + + axs[1].plot(matrix_profile[:, 0]) + plt.savefig(filename) From bd948e85985f016320d7a991368f42514adde8a0 Mon Sep 17 00:00:00 2001 From: ocnkr Date: Thu, 6 Apr 2023 19:37:25 -0400 Subject: [PATCH 2/9] finalize pattern detection --- pipit/trace.py | 53 ++++++++++++++++++++++---------------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/pipit/trace.py b/pipit/trace.py index 9b2b48e4..0f982628 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -808,8 +808,6 @@ def detect_pattern( process=0, start_event="time-loop", metric="time.exc", - plot=True, - filename="pattern_detection.png", ): import stumpy @@ -840,6 +838,9 @@ def detect_pattern( ].index[-1] df = df.iloc[first_loop_enter + 1 : last_loop_leave] + filtered_df = df.loc[ + (df[metric].notnull()) & (df["Process"] == process) + ] # & (df["Name"] != "time-loop") x = df.loc[(df[metric].notnull()) & (df["Process"] == process)][ "Timestamp (ns)" @@ -850,34 +851,26 @@ def detect_pattern( length_of_seq = int(len(y) / iterations) matrix_profile = stumpy.stump(y, length_of_seq) - dists, inde = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations) - - if plot: - import matplotlib.pyplot as plt - from matplotlib.patches import Rectangle - - fig, axs = plt.subplots(2, sharex=True, gridspec_kw={"hspace": 0}) - plt.suptitle("Pattern Detection", fontsize="20") - - axs[0].plot(y) - axs[0].set_ylabel("Time", fontsize="14") - - for idx in inde[0]: - print(idx, length_of_seq) - rect = Rectangle( - (idx, 0), - length_of_seq * 0.98, - y.max(), - fill=True, - facecolor="lightgrey", - ) - axs[0].add_patch(rect) + dists, indices = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations) - axs[1].set_xlabel("Index", fontsize="14") - axs[1].set_ylabel("Matrix Profile", fontsize="14") + # find the matches on the given trace + match_original = self.events.loc[ + self.events["Timestamp (ns)"].isin( + filtered_df.iloc[indices[0]]["Timestamp (ns)"].values + ) + ] - for idx in inde[0]: - axs[1].axvline(x=idx, linestyle="dashed") + # filter out the events happening before the start of the + # iteration and after the end of the iteration. + self.events = self.events[ + (self.events["Timestamp (ns)"] >= match_original.iloc[0]["Timestamp (ns)"]) + & ( + self.events["Timestamp (ns)"] + <= self.events.iloc[match_original.iloc[-1]._matching_event][ + "Timestamp (ns)" + ] + ) + | (self.events["Name"] == match_original.iloc[0]["Name"]) + ] - axs[1].plot(matrix_profile[:, 0]) - plt.savefig(filename) + return (match_original, indices, matrix_profile, length_of_seq) From c69c0b75b5de9a8249785f0db14440cf65b1fc17 Mon Sep 17 00:00:00 2001 From: ocnkr Date: Thu, 20 Apr 2023 12:21:13 -0400 Subject: [PATCH 3/9] update parameters --- pipit/trace.py | 91 +++++++++++++++++--------------------------------- 1 file changed, 31 insertions(+), 60 deletions(-) diff --git a/pipit/trace.py b/pipit/trace.py index 0f982628..cf855109 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -811,66 +811,37 @@ def detect_pattern( ): import stumpy - self.calc_exc_metrics() - df = self.events.copy(deep=True) - - iterations = len( - df[ - (df["Name"] == start_event) - & (df["Event Type"] == "Enter") - & (df["Process"] == process) +def detect_pattern( + self, data, iterations=None, window_size=None, process=0, metric="time.exc" +): + import stumpy + + y = data.loc[(data["time.exc"].notnull()) & (data["Process"] == 0)][ + "time.exc" + ].values[:] + matrix_profile = stumpy.stump(y, window_size) + dists, indices = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations) + + filtered_df = data.loc[(data[metric].notnull()) & (data["Process"] == process)] + + # find the matches on the given trace + match_original = self.events.loc[ + self.events["Timestamp (ns)"].isin( + filtered_df.iloc[indices[0]]["Timestamp (ns)"].values + ) + ] + + # filter out the events happening before the start of the + # iteration and after the end of the iteration. + self.events = self.events[ + (self.events["Timestamp (ns)"] >= match_original.iloc[0]["Timestamp (ns)"]) + & ( + self.events["Timestamp (ns)"] + <= self.events.iloc[match_original.iloc[-1]._matching_event][ + "Timestamp (ns)" ] ) + | (self.events["Name"] == match_original.iloc[0]["Name"]) + ] - df = df[df["Process"] == process] - df.reset_index(inplace=True) - - first_loop_enter = df[ - (df["Name"] == start_event) - & (df["Event Type"] == "Enter") - & (df["Process"] == process) - ].index[0] - - last_loop_leave = df[ - (df["Name"] == start_event) - & (df["Event Type"] == "Leave") - & (df["Process"] == process) - ].index[-1] - - df = df.iloc[first_loop_enter + 1 : last_loop_leave] - filtered_df = df.loc[ - (df[metric].notnull()) & (df["Process"] == process) - ] # & (df["Name"] != "time-loop") - - x = df.loc[(df[metric].notnull()) & (df["Process"] == process)][ - "Timestamp (ns)" - ].values[:] - y = df.loc[(df[metric].notnull()) & (df["Process"] == process)][metric].values[ - : - ] - - length_of_seq = int(len(y) / iterations) - matrix_profile = stumpy.stump(y, length_of_seq) - dists, indices = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations) - - # find the matches on the given trace - match_original = self.events.loc[ - self.events["Timestamp (ns)"].isin( - filtered_df.iloc[indices[0]]["Timestamp (ns)"].values - ) - ] - - # filter out the events happening before the start of the - # iteration and after the end of the iteration. - self.events = self.events[ - (self.events["Timestamp (ns)"] >= match_original.iloc[0]["Timestamp (ns)"]) - & ( - self.events["Timestamp (ns)"] - <= self.events.iloc[match_original.iloc[-1]._matching_event][ - "Timestamp (ns)" - ] - ) - | (self.events["Name"] == match_original.iloc[0]["Name"]) - ] - - return (match_original, indices, matrix_profile, length_of_seq) + return match_original From bb6b1bcde6ca7887626b043b9f0f607c1a579de5 Mon Sep 17 00:00:00 2001 From: ocnkr Date: Sun, 12 Nov 2023 12:31:49 -0500 Subject: [PATCH 4/9] return separate dataframes correspond to different patterns --- pipit/trace.py | 84 +++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 35 deletions(-) diff --git a/pipit/trace.py b/pipit/trace.py index cf855109..69720322 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -804,44 +804,58 @@ def multirun_analysis( return combined_df def detect_pattern( - self, - process=0, - start_event="time-loop", - metric="time.exc", + self, start_event, iterations=None, window_size=None, process=0, metric="time.exc" ): import stumpy -def detect_pattern( - self, data, iterations=None, window_size=None, process=0, metric="time.exc" -): - import stumpy - - y = data.loc[(data["time.exc"].notnull()) & (data["Process"] == 0)][ - "time.exc" - ].values[:] - matrix_profile = stumpy.stump(y, window_size) - dists, indices = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations) - - filtered_df = data.loc[(data[metric].notnull()) & (data["Process"] == process)] + # count the number of enter events to + # determine the number of iterations if it's not + # given by the user. + if iterations is None: + iterations = len( + self.events[ + (self.events["Name"] == start_event) + & (self.events["Event Type"] == "Enter") + & (self.events["Process"] == process) + ] + ) - # find the matches on the given trace - match_original = self.events.loc[ - self.events["Timestamp (ns)"].isin( - filtered_df.iloc[indices[0]]["Timestamp (ns)"].values - ) - ] - - # filter out the events happening before the start of the - # iteration and after the end of the iteration. - self.events = self.events[ - (self.events["Timestamp (ns)"] >= match_original.iloc[0]["Timestamp (ns)"]) - & ( - self.events["Timestamp (ns)"] - <= self.events.iloc[match_original.iloc[-1]._matching_event][ - "Timestamp (ns)" + # get the first enter and last leave of + # the given event. we will only investigate + # this portion of the data. + first_loop_enter = self.events[ + (self.events["Name"] == start_event) + & (self.events["Event Type"] == "Enter") + & (self.events["Process"] == process) + ].index[0] + + last_loop_leave = self.events[ + (self.events["Name"] == start_event) + & (self.events["Event Type"] == "Leave") + & (self.events["Process"] == process) + ].index[-1] + + df = self.events.iloc[first_loop_enter + 1 : last_loop_leave] + filtered_df = df.loc[(df[metric].notnull()) & (df["Process"] == process)] + y = filtered_df[metric].values[:] + + if window_size is None: + window_size = int(len(y) / iterations) + + matrix_profile = stumpy.stump(y, window_size) + dists, indices = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations) + + # Gets the corresponding portion from the original + # dataframe for each pattern. + patterns = [] + for idx in indices[0]: + end_idx = idx+window_size + + match_original = self.events.loc[ + self.events["Timestamp (ns)"].isin( + filtered_df.iloc[idx:end_idx]["Timestamp (ns)"].values + ) ] - ) - | (self.events["Name"] == match_original.iloc[0]["Name"]) - ] + patterns.append(match_original) - return match_original + return patterns From 12f1c573ef5441beb35f2dbb60a15b66b86b949f Mon Sep 17 00:00:00 2001 From: ocnkr Date: Sun, 12 Nov 2023 12:41:33 -0500 Subject: [PATCH 5/9] formatting --- pipit/trace.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/pipit/trace.py b/pipit/trace.py index 69720322..9684bbfb 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -72,7 +72,7 @@ def from_csv(filename): # if timestamps are in seconds, convert them to nanoseconds if "Timestamp (s)" in events_dataframe.columns: - events_dataframe["Timestamp (s)"] *= 10**9 + events_dataframe["Timestamp (s)"] *= 10 ** 9 events_dataframe.rename( columns={"Timestamp (s)": "Timestamp (ns)"}, inplace=True ) @@ -804,12 +804,17 @@ def multirun_analysis( return combined_df def detect_pattern( - self, start_event, iterations=None, window_size=None, process=0, metric="time.exc" + self, + start_event, + iterations=None, + window_size=None, + process=0, + metric="time.exc", ): import stumpy - # count the number of enter events to - # determine the number of iterations if it's not + # count the number of enter events to + # determine the number of iterations if it's not # given by the user. if iterations is None: iterations = len( @@ -820,8 +825,8 @@ def detect_pattern( ] ) - # get the first enter and last leave of - # the given event. we will only investigate + # get the first enter and last leave of + # the given event. we will only investigate # this portion of the data. first_loop_enter = self.events[ (self.events["Name"] == start_event) @@ -836,7 +841,7 @@ def detect_pattern( ].index[-1] df = self.events.iloc[first_loop_enter + 1 : last_loop_leave] - filtered_df = df.loc[(df[metric].notnull()) & (df["Process"] == process)] + filtered_df = df.loc[(df[metric].notnull()) & (df["Process"] == process)] y = filtered_df[metric].values[:] if window_size is None: @@ -845,11 +850,11 @@ def detect_pattern( matrix_profile = stumpy.stump(y, window_size) dists, indices = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations) - # Gets the corresponding portion from the original + # Gets the corresponding portion from the original # dataframe for each pattern. patterns = [] for idx in indices[0]: - end_idx = idx+window_size + end_idx = idx + window_size match_original = self.events.loc[ self.events["Timestamp (ns)"].isin( From 849ef4377fe0560cdf5ce01fc1bda0730bfbf0f3 Mon Sep 17 00:00:00 2001 From: ocnkr Date: Sun, 12 Nov 2023 12:44:06 -0500 Subject: [PATCH 6/9] formatting --- pipit/trace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipit/trace.py b/pipit/trace.py index 9684bbfb..43ce5a6c 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -72,7 +72,7 @@ def from_csv(filename): # if timestamps are in seconds, convert them to nanoseconds if "Timestamp (s)" in events_dataframe.columns: - events_dataframe["Timestamp (s)"] *= 10 ** 9 + events_dataframe["Timestamp (s)"] *= 10**9 events_dataframe.rename( columns={"Timestamp (s)": "Timestamp (ns)"}, inplace=True ) From fcc8b8bb423e76e96ec8f391bea92105552cd42a Mon Sep 17 00:00:00 2001 From: ocnkr Date: Mon, 13 Nov 2023 12:49:40 -0500 Subject: [PATCH 7/9] remove repeated lines --- pipit/trace.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/pipit/trace.py b/pipit/trace.py index 43ce5a6c..4c7172bd 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -813,34 +813,32 @@ def detect_pattern( ): import stumpy - # count the number of enter events to - # determine the number of iterations if it's not - # given by the user. - if iterations is None: - iterations = len( - self.events[ + enter_events = self.events[ (self.events["Name"] == start_event) & (self.events["Event Type"] == "Enter") & (self.events["Process"] == process) ] - ) + + leave_events = self.events[ + (self.events["Name"] == start_event) + & (self.events["Event Type"] == "Leave") + & (self.events["Process"] == process) + ] + + # count the number of enter events to + # determine the number of iterations if it's not + # given by the user. + if iterations is None: + iterations = len(enter_events) # get the first enter and last leave of # the given event. we will only investigate # this portion of the data. - first_loop_enter = self.events[ - (self.events["Name"] == start_event) - & (self.events["Event Type"] == "Enter") - & (self.events["Process"] == process) - ].index[0] + first_loop_enter = enter_events.index[0] + last_loop_leave = leave_events.index[-1] - last_loop_leave = self.events[ - (self.events["Name"] == start_event) - & (self.events["Event Type"] == "Leave") - & (self.events["Process"] == process) - ].index[-1] - df = self.events.iloc[first_loop_enter + 1 : last_loop_leave] + df = self.events.iloc[first_loop_enter + 1: last_loop_leave] filtered_df = df.loc[(df[metric].notnull()) & (df["Process"] == process)] y = filtered_df[metric].values[:] From 33f347711e3ab1fdeb125d25600c40a8ad2a5ac2 Mon Sep 17 00:00:00 2001 From: ocnkr Date: Mon, 13 Nov 2023 12:51:10 -0500 Subject: [PATCH 8/9] format --- pipit/trace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipit/trace.py b/pipit/trace.py index 4c7172bd..baaaf7de 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -838,7 +838,7 @@ def detect_pattern( last_loop_leave = leave_events.index[-1] - df = self.events.iloc[first_loop_enter + 1: last_loop_leave] + df = self.events.iloc[first_loop_enter + 1 : last_loop_leave] filtered_df = df.loc[(df[metric].notnull()) & (df["Process"] == process)] y = filtered_df[metric].values[:] From b5f61bcfe6f83af34fb6ba5f4c57dedbd48d2dad Mon Sep 17 00:00:00 2001 From: ocnkr Date: Mon, 13 Nov 2023 12:53:13 -0500 Subject: [PATCH 9/9] format --- pipit/trace.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pipit/trace.py b/pipit/trace.py index baaaf7de..2b4f111a 100644 --- a/pipit/trace.py +++ b/pipit/trace.py @@ -814,11 +814,11 @@ def detect_pattern( import stumpy enter_events = self.events[ - (self.events["Name"] == start_event) - & (self.events["Event Type"] == "Enter") - & (self.events["Process"] == process) - ] - + (self.events["Name"] == start_event) + & (self.events["Event Type"] == "Enter") + & (self.events["Process"] == process) + ] + leave_events = self.events[ (self.events["Name"] == start_event) & (self.events["Event Type"] == "Leave") @@ -837,7 +837,6 @@ def detect_pattern( first_loop_enter = enter_events.index[0] last_loop_leave = leave_events.index[-1] - df = self.events.iloc[first_loop_enter + 1 : last_loop_leave] filtered_df = df.loc[(df[metric].notnull()) & (df["Process"] == process)] y = filtered_df[metric].values[:]