Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stability test: collect uniq traces #13696

Merged
merged 6 commits into from
Jan 22, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions ydb/tests/stability/library/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import stat
import sys
import argparse
import re

from library.python import resource

Expand Down Expand Up @@ -59,6 +60,91 @@ def _unpack_resource(self, name):
os.chmod(path_to_unpack, st.st_mode | stat.S_IEXEC)
return path_to_unpack

def clean_trace(self, traces):
cleaned_lines = []
for line in traces.split('\n'):
line = re.sub(r' @ 0x[a-fA-F0-9]+', '', line)
# Убираем все до текста ошибки или указателя на строку кода
match_verify = re.search(r'VERIFY|FAIL|signal 11|signal 6|signal 15|uncaught exception', line)
match_code_file_line = re.search(r'\s+(\S+\.cpp:\d+).*', line)

if match_verify:
cleaned_lines.append(match_verify.group())
elif match_code_file_line:
cleaned_lines.append(match_code_file_line.group())

return "\n".join(cleaned_lines)

def is_sublist(self, shorter, longer):
""" Check if 'shorter' is a sublist in 'longer' from the start """
return longer[:len(shorter)] == shorter

def find_unique_traces_with_counts(self, all_traces):
clean_traces_dict = {}
unique_traces = {}

for trace in all_traces:
clean_trace = self.clean_trace(trace)
if clean_traces_dict.get(clean_trace):
clean_traces_dict[clean_trace].append(trace)
else:
clean_traces_dict[clean_trace] = [trace]

clean_traces_dict = dict(sorted(clean_traces_dict.items(), key=lambda item: len(item[0])))
for trace in clean_traces_dict:
for unique in unique_traces:
if self.is_sublist(trace, unique):
unique_traces[unique] = unique_traces[unique] + clean_traces_dict[trace]
break
elif self.is_sublist(unique, trace):
unique_traces[trace] = unique_traces[unique] + clean_traces_dict[trace]
del unique_traces[unique]
break
if not unique_traces.get(trace):
unique_traces[trace] = clean_traces_dict[trace]

return dict(sorted(unique_traces.items(), key=lambda item: len(item[1]), reverse=True))

def process_lines(self, text):
traces = []
trace = ""
for host in text:
host = host.split('\n')
for line in host:
if line in ("--", "\n", ""):
traces.append(trace)
trace = ""
else:
trace = trace + line + '\n'
return traces

def get_all_errors(self):
logging.getLogger().setLevel(logging.WARNING)
all_results = []
for node in self.kikimr_cluster.nodes.values():
result = node.ssh_command("""
ls -ltr /Berkanavt/kikimr*/logs/kikimr* |
awk '{print $NF}' |
while read file; do
case "$file" in
*.txt) cat "$file" ;;
*.gz) zcat "$file" ;;
*) cat "$file" ;;
esac
done |
grep -E 'VERIFY|FAIL|signal 11|signal 6|signal 15|uncaught exception' -A 20
""", raise_on_error=False)
if result:
all_results.append(result.decode('utf-8'))
all_results = self.process_lines(all_results)
return all_results

def get_errors(self):
errors = self.get_all_errors()
unique_traces = self.find_unique_traces_with_counts(errors)
for trace in unique_traces:
print(f"Trace (Occurrences: {len(unique_traces[trace])}):\n{trace}\n{'-'*60}")

def perform_checks(self):

safety_violations = safety_warden_factory(self.kikimr_cluster, self.ssh_username).list_of_safety_violations()
Expand Down Expand Up @@ -193,6 +279,7 @@ def parse_args():
type=str,
nargs="+",
choices=[
"get_errors",
"cleanup",
"cleanup_logs",
"cleanup_dumps",
Expand Down Expand Up @@ -222,6 +309,8 @@ def main():
)

for action in args.actions:
if action == "get_errors":
stability_cluster.get_errors()
if action == "deploy_ydb":
stability_cluster.deploy_ydb()
if action == "cleanup":
Expand Down
Loading