From 7d31d1d7ec0038689fc011e172d14540f1d8ab62 Mon Sep 17 00:00:00 2001
From: jon-funk <jonathan.funk@runwhen.com>
Date: Mon, 4 Mar 2024 18:35:00 -0800
Subject: [PATCH] add code peek to app troubleshoot codebundle (#344)

---
 .../k8s-app-troubleshoot/runbook.robot        | 18 ++++-----
 .../RW/K8sApplications/k8s_applications.py    | 21 ++++++++--
 libraries/RW/K8sApplications/parsers.py       | 40 +++++++++++++++++++
 libraries/RW/K8sApplications/repository.py    | 17 ++++++++
 4 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/codebundles/k8s-app-troubleshoot/runbook.robot b/codebundles/k8s-app-troubleshoot/runbook.robot
index 06ec9d48..5fb47fff 100644
--- a/codebundles/k8s-app-troubleshoot/runbook.robot
+++ b/codebundles/k8s-app-troubleshoot/runbook.robot
@@ -66,13 +66,13 @@ Troubleshoot `${CONTAINER_NAME}` Application Logs
     ...    render_in_commandlist=true
     ...    env=${env}
     ...    secret_file__kubeconfig=${kubeconfig}
-    ${printenv}=    RW.CLI.Run Cli
-    ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get all -l ${LABELS} -oname | grep -iE "deploy|stateful" | head -n 1) --container=${CONTAINER_NAME} -- printenv
-    ...    show_in_rwl_cheatsheet=true
-    ...    render_in_commandlist=true
-    ...    include_in_history=False
-    ...    env=${env}
-    ...    secret_file__kubeconfig=${kubeconfig}
+    # ${printenv}=    RW.CLI.Run Cli
+    # ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get all -l ${LABELS} -oname | grep -iE "deploy|stateful" | head -n 1) --container=${CONTAINER_NAME} -- printenv
+    # ...    show_in_rwl_cheatsheet=true
+    # ...    render_in_commandlist=true
+    # ...    include_in_history=False
+    # ...    env=${env}
+    # ...    secret_file__kubeconfig=${kubeconfig}
     ${proc_list}=    RW.CLI.Run Cli
     ...    cmd=${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} exec $(${KUBERNETES_DISTRIBUTION_BINARY} --context=${CONTEXT} -n ${NAMESPACE} get all -l ${LABELS} -oname | grep -iE "deploy|stateful" | head -n 1) --container=${CONTAINER_NAME} -- ps -eo command --no-header | grep -v "ps -eo"
     ...    show_in_rwl_cheatsheet=true
@@ -83,14 +83,14 @@ Troubleshoot `${CONTAINER_NAME}` Application Logs
     ${app_repo}=    RW.K8sApplications.Clone Repo    ${REPO_URI}    ${REPO_AUTH_TOKEN}    ${NUM_OF_COMMITS}
     # ${test_data}=    RW.K8sApplications.Get Test Data
     ${proc_list}=    RW.K8sApplications.Format Process List    ${proc_list.stdout}
-    ${serialized_env}=    RW.K8sApplications.Serialize env    ${printenv.stdout}
+    # ${serialized_env}=    RW.K8sApplications.Serialize env    ${printenv.stdout}
     ${parsed_exceptions}=    RW.K8sApplications.Parse Exceptions    ${logs.stdout}
     # ${parsed_exceptions}=    RW.K8sApplications.Parse Exceptions    ${test_data}
     ${repos}=    Create List    ${app_repo}
     ${ts_results}=    RW.K8sApplications.Troubleshoot Application
     ...    repos=${repos}
     ...    exceptions=${parsed_exceptions}
-    ...    env=${serialized_env}
+    # ...    env=${serialized_env}
     ...    process_list=${proc_list}
     ...    app_name=${CONTAINER_NAME}
     ${history}=    RW.CLI.Pop Shell History
diff --git a/libraries/RW/K8sApplications/k8s_applications.py b/libraries/RW/K8sApplications/k8s_applications.py
index 0e3de858..8a0592e1 100644
--- a/libraries/RW/K8sApplications/k8s_applications.py
+++ b/libraries/RW/K8sApplications/k8s_applications.py
@@ -126,6 +126,7 @@ def troubleshoot_application(
     search_words: list[str] = []
     exception_occurences: dict = {}
     most_common_exception: str = ""
+    most_common_file_peek: str = ""
     errors_summary: str = ""
     report: str = ""
     for repo in repos:
@@ -141,10 +142,10 @@ def troubleshoot_application(
                 # we hash the exception strings to shorten them for dict searches
                 hashed_exception = _hash_string_md5(excep.raw)
                 if hashed_exception not in exception_occurences:
+                    # TODO: clean this up to use dataclass
                     exception_occurences[hashed_exception] = {
                         "count": 1,
-                        "content": excep.raw,
-                        "errors_summary": excep.errors_summary,
+                        "exception": excep,
                     }
                 elif hashed_exception in exception_occurences:
                     exception_occurences[hashed_exception]["count"] += 1
@@ -171,9 +172,14 @@ def troubleshoot_application(
     for hashed_exception in exception_occurences:
         count = exception_occurences[hashed_exception]["count"]
         if count > max_count:
+            excep = exception_occurences[hashed_exception]["exception"]
+            repo_file = repos[0].find_file(excep.first_file)
+            if excep.first_line_nums:
+                logger.info(f"line nums: {excep.first_line_nums}")
+                most_common_file_peek = repo_file.content_peek(excep.first_line_nums[0])
             max_count = count
-            most_common_exception = exception_occurences[hashed_exception]["content"]
-            errors_summary = exception_occurences[hashed_exception]["errors_summary"]
+            most_common_exception = excep.raw
+            errors_summary = excep.errors_summary
     err_msg_line = f"There are some error(s) with the {app_name} application: {errors_summary}\nThis was the most common exception found:"
     if not errors_summary:
         err_msg_line = f"The following exception was found while parsing the application logs of {app_name}"
@@ -191,6 +197,10 @@ def troubleshoot_application(
 ```
 {most_common_exception}
 ```
+Near this code:
+```
+{most_common_file_peek}
+```
 """
         if most_common_exception
         else "No common exceptions could be parsed. Try running the log command provided."
@@ -217,6 +227,9 @@ def troubleshoot_application(
         "found_exceptions": (True if most_common_exception else False),
     }
 
+def get_file_contents_peek(filename: str, st: StackTraceData) -> str:
+    return 
+
 def _get_workspace_url():
     workspace: str = os.getenv("RW_WORKSPACE", "")
     base_url: str = os.getenv("RW_FRONTEND_URL", "")
diff --git a/libraries/RW/K8sApplications/parsers.py b/libraries/RW/K8sApplications/parsers.py
index 06ed87f1..fc6d1ab3 100644
--- a/libraries/RW/K8sApplications/parsers.py
+++ b/libraries/RW/K8sApplications/parsers.py
@@ -10,6 +10,7 @@ class StackTraceData:
     # similar to urls, except just the API endpoints if found
     endpoints: list[str]
     files: list[str]
+    line_nums: dict[str, list[int]] # line numbers associated with exceptions per file
     error_messages: list[str]
     raw: str = field(default="", repr=False)
     # TODO: create a in-mem db of exception types
@@ -30,6 +31,20 @@ def has_results(self):
     @property
     def errors_summary(self) -> str:
         return ", ".join(self.error_messages)
+    
+    @property
+    def first_file(self) -> str:
+        if len(self.files) > 0:
+            return self.files[0]
+        else:
+            return ""
+    
+    @property
+    def first_line_nums(self) -> list[int]:
+        if len(self.line_nums.keys()) > 0:
+            return list(self.line_nums.values())[0]
+        else:
+            return []
 
 
 class BaseStackTraceParse:
@@ -57,6 +72,7 @@ def is_json(data: str) -> bool:
     @staticmethod
     def parse_log(log) -> StackTraceData:
         file_paths: list[str] = BaseStackTraceParse.extract_files(log)
+        line_nums: dict[str,list[int]] = BaseStackTraceParse.extract_line_nums(log)
         urls: list[str] = BaseStackTraceParse.extract_urls(log)
         endpoints: list[str] = BaseStackTraceParse.extract_endpoints(log)
         error_messages: list[str] = BaseStackTraceParse.extract_sentences(log)
@@ -64,10 +80,34 @@ def parse_log(log) -> StackTraceData:
             urls=urls,
             endpoints=endpoints,
             files=file_paths,
+            line_nums=line_nums,
             error_messages=error_messages,
             raw=log,
         )
         return st_data
+    
+    @staticmethod
+    def extract_line_nums(text, exclude_paths: list[str] = None) -> dict[str,list[int]]:
+        if exclude_paths is None:
+            exclude_paths = BaseStackTraceParse.exclude_file_paths
+        results = {}
+        regex = r"/[\w./_-]+\.[a-zA-Z0-9]+"
+        matches = re.findall(regex, text)
+        matches = [
+            m
+            for m in matches
+            if not any(exclude_path in m for exclude_path in exclude_paths)
+        ]
+        for m in matches:
+            if m not in results.keys():
+                results[m] = []
+            regex = r"line (\d+)"
+            line_nums = re.findall(regex, text)
+            for line_num in line_nums:
+                if line_num not in results[m]:
+                    results[m].append(int(line_num))
+        return results
+
 
     @staticmethod
     def extract_files(text, exclude_paths: list[str] = None) -> list[str]:
diff --git a/libraries/RW/K8sApplications/repository.py b/libraries/RW/K8sApplications/repository.py
index 6dc4b0d1..649bbb37 100644
--- a/libraries/RW/K8sApplications/repository.py
+++ b/libraries/RW/K8sApplications/repository.py
@@ -136,6 +136,14 @@ def search(self, search_term: str) -> RepositorySearchResult:
         # skip_regex = r"^[ \t\n\r#*,(){}\[\]\"\'\':]*$"
         return None
 
+    def content_peek(self, line_num: int, before: int=44, after: int=6) -> str:
+        lines = self.content.splitlines()
+        start = max(0, line_num - before - 1)
+        end = min(len(lines), line_num + after)
+        peek_lines = lines[start:end]
+        result = '\n'.join(peek_lines)
+        return result
+
     def git_add(self):
         try:
             add_stdout = subprocess.run(
@@ -228,6 +236,15 @@ def __init__(
             self.auth_token = None
         self.files = RepositoryFiles()
         self.branch = branch
+    
+    def find_file(self, filename: str) -> RepositoryFile:
+        if not filename:
+            return None
+        for fn, obj in self.files.files.items():
+            if obj.basename == filename:
+                return obj
+            elif obj.basename == os.path.basename(filename):
+                return obj
 
     def clone_repo(
         self,