diff --git a/episodes/06-expansion.md b/episodes/06-expansion.md index c6dba6a..1d1ec6d 100644 --- a/episodes/06-expansion.md +++ b/episodes/06-expansion.md @@ -119,7 +119,7 @@ curl -O https://ocaisa.github.io/hpc-workflows/files/plot_terse_amdahl_results.p The script `plot_terse_amdahl_results.py` needs a command line that looks like: ```bash -python plot_terse_amdahl_results.py <1st input file> <2nd input file> ... +python plot_terse_amdahl_results.py --output <1st input file> <2nd input file> ... ``` Let's introduce that into our `generate_run_files` rule: @@ -129,7 +129,7 @@ rule generate_run_files: output: "p_{parallel_proportion}_runs.txt" input: expand("p_{{parallel_proportion}}/runs/amdahl_run_{count}.json", count=NTASK_SIZES) shell: - "python plot_terse_amdahl_results.py {output} {input}" + "python plot_terse_amdahl_results.py --output {output} {input}" ``` ::: challenge @@ -146,7 +146,7 @@ rule generate_run_files: envmodules: "matplotlib" shell: - "python plot_terse_amdahl_results.py {output} {input}" + "python plot_terse_amdahl_results.py --output {output} {input}" ``` :::::: @@ -188,6 +188,7 @@ snakemake --profile cluster_profile/ p_0.8_scalability.jpg ::: ::: challenge + ## Bonus round Create a final rule that can be called directly and generates a scaling plot for diff --git a/episodes/files/plot_terse_amdahl_results.py b/episodes/files/plot_terse_amdahl_results.py old mode 100644 new mode 100755 index a85425f..fdb09bb --- a/episodes/files/plot_terse_amdahl_results.py +++ b/episodes/files/plot_terse_amdahl_results.py @@ -1,15 +1,34 @@ -import sys +#!/usr/bin/env python3 +import argparse import json +import matplotlib import matplotlib.pyplot as plt import numpy as np -def process_files(file_list, output="plot.jpg"): +matplotlib.use('AGG') + +description = """ +Plot results of an Amdahl scaling study, +assuming the '--terse' output flag was used. +""" + +def process_files(output, file_list): value_tuples=[] for filename in file_list: # Open the JSON file and load data - with open(filename, 'r') as file: - data = json.load(file) - value_tuples.append((data['nproc'], data['execution_time'])) + try: + with open(filename, 'r') as file: + data = json.load(file) + value_tuples.append((data['nproc'], data['execution_time'])) + except FileNotFoundError: + print(f"Error: File {filename} not found.") + return + except json.JSONDecodeError: + print(f"Error: File {filename} is not a valid JSON.") + return + except KeyError: + print(f"Error: Missing required data in file {filename}.") + return # Sort the tuples sorted_list = sorted(value_tuples) @@ -22,9 +41,10 @@ def process_files(file_list, output="plot.jpg"): # Adding the y=1/x line x_line = np.linspace(1, max(x), 100) # Create x values for the line - y_line = (y[0]/x[0]) / x_line # Calculate corresponding (scaled) y values + y_line = (y[0] / x[0]) / x_line # Calculate corresponding (scaled) y values - plt.plot(x_line, y_line, linestyle='--', color='red', label='Perfect scaling') + plt.plot(x_line, y_line, linestyle='--', + color='red', label='Perfect scaling') # Adding title and labels plt.title("Scaling plot") @@ -34,16 +54,27 @@ def process_files(file_list, output="plot.jpg"): # Show the legend plt.legend() - # Save the plot to a JPEG file - plt.savefig(output, format='jpeg') + # Save the plot to the specified file + plt.savefig(output, dpi=400, bbox_inches="tight") if __name__ == "__main__": - # The first command-line argument is the script name itself, so we skip it - output = sys.argv[1] - filenames = sys.argv[2:] + parser = argparse.ArgumentParser( + description=description, + epilog="Brought to you by HPC Carpentry" + ) + + parser.add_argument( + "--output", + help="Image file to write (PNG or JPG)", + required=True + ) + + parser.add_argument( + "inputs", + help="Amdahl terse output files (JSON)", + nargs="+" + ) - if filenames: - process_files(filenames, output=output) - else: - print("No files provided.") + args = parser.parse_args() + process_files(args.output, args.inputs)