From 7d31d1d7ec0038689fc011e172d14540f1d8ab62 Mon Sep 17 00:00:00 2001 From: jon-funk Date: Mon, 4 Mar 2024 18:35:00 -0800 Subject: [PATCH] add code peek to app troubleshoot codebundle (#344) --- .../k8s-app-troubleshoot/runbook.robot | 18 ++++----- .../RW/K8sApplications/k8s_applications.py | 21 ++++++++-- libraries/RW/K8sApplications/parsers.py | 40 +++++++++++++++++++ libraries/RW/K8sApplications/repository.py | 17 ++++++++ 4 files changed, 83 insertions(+), 13 deletions(-) diff --git a/codebundles/k8s-app-troubleshoot/runbook.robot b/codebundles/k8s-app-troubleshoot/runbook.robot index 06ec9d48..5fb47fff 100644 --- a/codebundles/k8s-app-troubleshoot/runbook.robot +++ b/codebundles/k8s-app-troubleshoot/runbook.robot @@ -66,13 +66,13 @@ Troubleshoot `${CONTAINER_NAME}` Application Logs ... render_in_commandlist=true ... env=${env} ... secret_file__kubeconfig=${kubeconfig} - ${printenv}= RW.CLI.Run Cli - ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get all -l ${LABELS} -oname | grep -iE "deploy|stateful" | head -n 1) --container=${CONTAINER_NAME} -- printenv - ... show_in_rwl_cheatsheet=true - ... render_in_commandlist=true - ... include_in_history=False - ... env=${env} - ... secret_file__kubeconfig=${kubeconfig} + # ${printenv}= RW.CLI.Run Cli + # ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get all -l ${LABELS} -oname | grep -iE "deploy|stateful" | head -n 1) --container=${CONTAINER_NAME} -- printenv + # ... show_in_rwl_cheatsheet=true + # ... render_in_commandlist=true + # ... include_in_history=False + # ... env=${env} + # ... secret_file__kubeconfig=${kubeconfig} ${proc_list}= RW.CLI.Run Cli ... cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get all -l ${LABELS} -oname | grep -iE "deploy|stateful" | head -n 1) --container=${CONTAINER_NAME} -- ps -eo command --no-header | grep -v "ps -eo" ... show_in_rwl_cheatsheet=true @@ -83,14 +83,14 @@ Troubleshoot `${CONTAINER_NAME}` Application Logs ${app_repo}= RW.K8sApplications.Clone Repo ${REPO_URI} ${REPO_AUTH_TOKEN} ${NUM_OF_COMMITS} # ${test_data}= RW.K8sApplications.Get Test Data ${proc_list}= RW.K8sApplications.Format Process List ${proc_list.stdout} - ${serialized_env}= RW.K8sApplications.Serialize env ${printenv.stdout} + # ${serialized_env}= RW.K8sApplications.Serialize env ${printenv.stdout} ${parsed_exceptions}= RW.K8sApplications.Parse Exceptions ${logs.stdout} # ${parsed_exceptions}= RW.K8sApplications.Parse Exceptions ${test_data} ${repos}= Create List ${app_repo} ${ts_results}= RW.K8sApplications.Troubleshoot Application ... repos=${repos} ... exceptions=${parsed_exceptions} - ... env=${serialized_env} + # ... env=${serialized_env} ... process_list=${proc_list} ... app_name=${CONTAINER_NAME} ${history}= RW.CLI.Pop Shell History diff --git a/libraries/RW/K8sApplications/k8s_applications.py b/libraries/RW/K8sApplications/k8s_applications.py index 0e3de858..8a0592e1 100644 --- a/libraries/RW/K8sApplications/k8s_applications.py +++ b/libraries/RW/K8sApplications/k8s_applications.py @@ -126,6 +126,7 @@ def troubleshoot_application( search_words: list[str] = [] exception_occurences: dict = {} most_common_exception: str = "" + most_common_file_peek: str = "" errors_summary: str = "" report: str = "" for repo in repos: @@ -141,10 +142,10 @@ def troubleshoot_application( # we hash the exception strings to shorten them for dict searches hashed_exception = _hash_string_md5(excep.raw) if hashed_exception not in exception_occurences: + # TODO: clean this up to use dataclass exception_occurences[hashed_exception] = { "count": 1, - "content": excep.raw, - "errors_summary": excep.errors_summary, + "exception": excep, } elif hashed_exception in exception_occurences: exception_occurences[hashed_exception]["count"] += 1 @@ -171,9 +172,14 @@ def troubleshoot_application( for hashed_exception in exception_occurences: count = exception_occurences[hashed_exception]["count"] if count > max_count: + excep = exception_occurences[hashed_exception]["exception"] + repo_file = repos[0].find_file(excep.first_file) + if excep.first_line_nums: + logger.info(f"line nums: {excep.first_line_nums}") + most_common_file_peek = repo_file.content_peek(excep.first_line_nums[0]) max_count = count - most_common_exception = exception_occurences[hashed_exception]["content"] - errors_summary = exception_occurences[hashed_exception]["errors_summary"] + most_common_exception = excep.raw + errors_summary = excep.errors_summary err_msg_line = f"There are some error(s) with the {app_name} application: {errors_summary}\nThis was the most common exception found:" if not errors_summary: err_msg_line = f"The following exception was found while parsing the application logs of {app_name}" @@ -191,6 +197,10 @@ def troubleshoot_application( ``` {most_common_exception} ``` +Near this code: +``` +{most_common_file_peek} +``` """ if most_common_exception else "No common exceptions could be parsed. Try running the log command provided." @@ -217,6 +227,9 @@ def troubleshoot_application( "found_exceptions": (True if most_common_exception else False), } +def get_file_contents_peek(filename: str, st: StackTraceData) -> str: + return + def _get_workspace_url(): workspace: str = os.getenv("RW_WORKSPACE", "") base_url: str = os.getenv("RW_FRONTEND_URL", "") diff --git a/libraries/RW/K8sApplications/parsers.py b/libraries/RW/K8sApplications/parsers.py index 06ed87f1..fc6d1ab3 100644 --- a/libraries/RW/K8sApplications/parsers.py +++ b/libraries/RW/K8sApplications/parsers.py @@ -10,6 +10,7 @@ class StackTraceData: # similar to urls, except just the API endpoints if found endpoints: list[str] files: list[str] + line_nums: dict[str, list[int]] # line numbers associated with exceptions per file error_messages: list[str] raw: str = field(default="", repr=False) # TODO: create a in-mem db of exception types @@ -30,6 +31,20 @@ def has_results(self): @property def errors_summary(self) -> str: return ", ".join(self.error_messages) + + @property + def first_file(self) -> str: + if len(self.files) > 0: + return self.files[0] + else: + return "" + + @property + def first_line_nums(self) -> list[int]: + if len(self.line_nums.keys()) > 0: + return list(self.line_nums.values())[0] + else: + return [] class BaseStackTraceParse: @@ -57,6 +72,7 @@ def is_json(data: str) -> bool: @staticmethod def parse_log(log) -> StackTraceData: file_paths: list[str] = BaseStackTraceParse.extract_files(log) + line_nums: dict[str,list[int]] = BaseStackTraceParse.extract_line_nums(log) urls: list[str] = BaseStackTraceParse.extract_urls(log) endpoints: list[str] = BaseStackTraceParse.extract_endpoints(log) error_messages: list[str] = BaseStackTraceParse.extract_sentences(log) @@ -64,10 +80,34 @@ def parse_log(log) -> StackTraceData: urls=urls, endpoints=endpoints, files=file_paths, + line_nums=line_nums, error_messages=error_messages, raw=log, ) return st_data + + @staticmethod + def extract_line_nums(text, exclude_paths: list[str] = None) -> dict[str,list[int]]: + if exclude_paths is None: + exclude_paths = BaseStackTraceParse.exclude_file_paths + results = {} + regex = r"/[\w./_-]+\.[a-zA-Z0-9]+" + matches = re.findall(regex, text) + matches = [ + m + for m in matches + if not any(exclude_path in m for exclude_path in exclude_paths) + ] + for m in matches: + if m not in results.keys(): + results[m] = [] + regex = r"line (\d+)" + line_nums = re.findall(regex, text) + for line_num in line_nums: + if line_num not in results[m]: + results[m].append(int(line_num)) + return results + @staticmethod def extract_files(text, exclude_paths: list[str] = None) -> list[str]: diff --git a/libraries/RW/K8sApplications/repository.py b/libraries/RW/K8sApplications/repository.py index 6dc4b0d1..649bbb37 100644 --- a/libraries/RW/K8sApplications/repository.py +++ b/libraries/RW/K8sApplications/repository.py @@ -136,6 +136,14 @@ def search(self, search_term: str) -> RepositorySearchResult: # skip_regex = r"^[ \t\n\r#*,(){}\[\]\"\'\':]*$" return None + def content_peek(self, line_num: int, before: int=44, after: int=6) -> str: + lines = self.content.splitlines() + start = max(0, line_num - before - 1) + end = min(len(lines), line_num + after) + peek_lines = lines[start:end] + result = '\n'.join(peek_lines) + return result + def git_add(self): try: add_stdout = subprocess.run( @@ -228,6 +236,15 @@ def __init__( self.auth_token = None self.files = RepositoryFiles() self.branch = branch + + def find_file(self, filename: str) -> RepositoryFile: + if not filename: + return None + for fn, obj in self.files.files.items(): + if obj.basename == filename: + return obj + elif obj.basename == os.path.basename(filename): + return obj def clone_repo( self,