From b37451d9797677a066b1464c8e831424ec7976c3 Mon Sep 17 00:00:00 2001
From: Andrii Povsten <voreychannel@gmail.com>
Date: Wed, 10 Jul 2024 14:32:08 +0200
Subject: [PATCH] Updated comments

---
 scripts/logs_lifetime_benchmarking_test.py | 61 ++++++++++++++++------
 1 file changed, 45 insertions(+), 16 deletions(-)
diff --git a/scripts/logs_lifetime_benchmarking_test.py b/scripts/logs_lifetime_benchmarking_test.py
index df4154ed..6b524ff6 100644
--- a/scripts/logs_lifetime_benchmarking_test.py
+++ b/scripts/logs_lifetime_benchmarking_test.py
@@ -1,9 +1,21 @@
 import pandas as pd
+import subprocess
 import matplotlib.pyplot as plt
 from matplotlib.dates import DateFormatter, SecondLocator
 import click
 
 """Run this script to generate the plots of current job status"""
+"""First compare the logs from reana-client logs command and the job pod ID's from statistics file"""
+
+def run_reana_client_logs(command):
+    command = f"reana-client logs -w {workflow}"
+    result = subprocess.run(command, shell=True, capture_output=True, text=True)
+    if result.returncode == 0:
+        return result.stdout
+    else:
+        print("Comamnd failed to run, error:")
+        print(result.stderr)
+        return None
 
 def parse_log_file(file_path):
     with open(file_path, 'r') as f:
@@ -33,39 +45,48 @@ def filter_jobs(sorted_data, status):
 def extract_running_timestamps(running_jobs):
     timestamps_running = []
     encountered_jobs_running = set()
+
     for line in running_jobs:
         parts = line.split()
         job_id = parts[0]
         if job_id in encountered_jobs_running:
             continue
+
         start_time = pd.to_datetime(parts[3])
         finish_time_str = parts[5].split(',')[0]
+
         if finish_time_str != '<none>':
             finish_time = pd.to_datetime(finish_time_str)
             timestamps_running.append((start_time, 1))
             timestamps_running.append((finish_time, -1))
             encountered_jobs_running.add(job_id)
+
     timestamps_running.sort()
     return timestamps_running
 
 def extract_pending_timestamps(pending_jobs):
     timestamps_pending = []
     encountered_jobs_pending = set()
+
     for line in pending_jobs:
         parts = line.split()
         job_id = parts[0]
         if job_id in encountered_jobs_pending:
             continue
+
         start_time_str = parts[2]
         if start_time_str == '<none>':
             continue
+
         start_time = pd.to_datetime(start_time_str)
         finish_time_str = parts[3].split(',')[0]
+
         if finish_time_str != '<none>':
             finish_time = pd.to_datetime(finish_time_str)
             timestamps_pending.append((start_time, 1))
             timestamps_pending.append((finish_time, -1))
             encountered_jobs_pending.add(job_id)
+
     timestamps_pending.sort()
     return timestamps_pending
 
@@ -80,21 +101,6 @@ def calculate_cumulative(timestamps):
     return x, y
  
 def plot_data(succeeded_counts, x_running, y_running, x_pending, y_pending):
-    """Run benchmarking tests. Generate matplotlib plot
-
-    The script requires matplotlib and pandas packages
-
-    Steps to run benchmarking workflow lifetime test:
-
-        .. code-block:: console
-
-        \b
-        #To run this script 
-        $ kubectl #To save a live logs 
-        $ cd reana/scripts
-        $ python lifetime.py logs.txt # insert your .txt file with logs
-
-    """
     plt.figure(figsize=figsize)
 
     # Plot succeeded jobs
@@ -121,7 +127,30 @@ def plot_data(succeeded_counts, x_running, y_running, x_pending, y_pending):
 @click.argument('file_path')
 @click.option('--title', default='Analysis Results', help='Title of the analysis results')
 @click.option('--figsize', nargs=2, type=float, default=(12, 8), help='Figure size as two float values')
-def main(file_path):
+@click.option('--workflow', required=False, help='Name of the REANA workflow the same as the processed .txt file')
+def main(file_path, title, figsize, workflow):
+    """Compare the reana-client logs and the jobs from the analysis results
+       Run benchmarking tests. Generate matplotlib plot
+
+       The script requires matplotlib and pandas packages
+
+       Steps to run benchmarking workflow lifetime test:
+
+        .. code-block:: console
+
+        \b
+        #To run this script 
+        $ kubectl #To save a live logs 
+        $ cd reana/scripts
+        $ python lifetime.py logs.txt # insert your .txt file with logs
+
+    """
+    reana_logs = run_reana_client_logs(workflow)
+    reana_job_ids = set()
+    for line in reana_logs.splitlines():
+        if line.strip().startswith('reana-run-job'):
+            job_id  = line.strip().split()[0]
+            reana_job_ids.add(job_id)
     lines = parse_log_file(file_path)
     
     unique_jobs = extract_unique_jobs(lines)