From 645fff58d3565075e44919e09edefd4fbbc0771f Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 13 Feb 2014 13:47:11 -0700 Subject: [PATCH 01/24] Modify the command to accept the parameters needed for cluster environment support --- scaling/commands/bench_suite_maker.py | 96 +++++++++++++++++++-------- 1 file changed, 67 insertions(+), 29 deletions(-) diff --git a/scaling/commands/bench_suite_maker.py b/scaling/commands/bench_suite_maker.py index 1172b91..7af6926 100644 --- a/scaling/commands/bench_suite_maker.py +++ b/scaling/commands/bench_suite_maker.py @@ -11,70 +11,108 @@ __status__ = "Development" from pyqi.core.command import (Command, CommandIn, CommandOut, - ParameterCollection) + ParameterCollection) from pyqi.core.exception import CommandError from scaling.make_bench_suite import (make_bench_suite_files, - make_bench_suite_parameters) + make_bench_suite_parameters) + class BenchSuiteMaker(Command): BriefDescription = "Generates a benchmark suite file" LongDescription = ("Given a command and a list of benchmarks files or a " - "dictionary with the options to test, this command generates a shell " - "script that executes a complete benchmark suite.") + "dictionary with the options to test, this command " + "generates a shell script that executes a complete " + "benchmark suite.") CommandIns = ParameterCollection([ CommandIn(Name='command', DataType=str, - Description='command to benchmark', Required=True), + Description='command to benchmark', Required=True), CommandIn(Name='parameters', DataType=dict, - Description='dictionary where the keys are the parameters to test ' - 'and the values are a list of values for such parameter.', - DefaultDescription='No parameters used', Default=None), + Description='dictionary where the keys are the parameters ' + 'to test and the values are a list of values for such ' + 'parameter.', + DefaultDescription='No parameters used', Default=None), CommandIn(Name='bench_files', DataType=list, - Description='List of lists of paths to the benchmark files to use ' - 'as input for the command. Each inner list is a test case and ' - 'should have the same length as the in_opts parameter.', - DefaultDescription='No bench_files used', - Required=False, Default=None), + Description='List of lists of paths to the benchmark files ' + 'to use as input for the command. Each inner list is a test ' + 'case and should have the same length as the in_opts ' + 'parameter.', + DefaultDescription='No bench_files used', + Required=False, Default=None), CommandIn(Name='in_opts', DataType=list, - Description='list of options used for providing the benchmark files' - ' to the command. It should have the same length and order than the' - ' inner lists of bench_files.', - DefaultDescription='["-i"] is used as a default', - Required=False, Default=["-i"]), + Description='list of options used for providing the ' + 'benchmark files to the command. It should have the same ' + 'length and order than the inner lists of bench_files.', + DefaultDescription='["-i"] is used as a default', + Required=False, Default=["-i"]), CommandIn(Name='out_opt', DataType=str, - Description='Option used for providing the output path to the ' - 'command to benchmark.', - DefaultDescription='"-o" is used as default', - Required=False, Default="-o") + Description='Option used for providing the output path to ' + 'the command to benchmark.', + DefaultDescription='"-o" is used as default', + Required=False, Default="-o"), + CommandIn(Name='pbs', DataType=bool, + Description='Flag to determine if the benchmark suite will ' + 'run in a PBS cluster environment', + DefaultDescription='False: run serially in bash', + Required=False, Default=False), + CoomandIn(Name='job_prefix', DataType=str, + Description='Prefix for the job name in case of a PBS ' + 'cluster environment', + DefaultDescription='"bench_" is used as a default prefix', + Required=False, Default="bench_"), + CommandIn(Name='queue', DataType=str, + Description='PBS queue to submit jobs', + DefaultDescription='"" is used as default, which will submit' + ' the jobs to the system default queue', + Required=False, Default=""), + CommandIn(Name='pbs_extra_args', DataType=str, + Description='Any extra arguments needed to qsub', + DefaultDescription='No extra arguments are used', + Required=False, Default=None) ]) CommandOuts = ParameterCollection([ CommandOut(Name='bench_suite', DataType=str, - Description='String with the benchmark suite')]) + Description='String with the benchmark suite')]) def run(self, **kwargs): result = {} + # Get command parameters command = kwargs['command'] out_opt = kwargs['out_opt'] parameters = kwargs['parameters'] bench_files = kwargs['bench_files'] in_opts = kwargs['in_opts'] + pbs = kwargs['pbs'] + job_prefix = kwargs['job_prefix'] + queue = kwargs['queue'] + pbs_extra_args = kwargs['pbs_extra_args'] + + # Check which type of bench suite are we generating if parameters: + # We are generating a benchmark suite based on different parameter + # values. In such case, the user should not provide any bench file if bench_files: raise CommandError("Parameters or bench_files should be " - "provided, but not both.") + "provided, but not both.") bench_str = make_bench_suite_parameters(command, parameters, - out_opt) + out_opt, pbs, job_prefix, + queue, pbs_extra_args) elif bench_files: + # We are generating a benchmark suite based on input files, + # Check that the number of benchmark files for test case match + # the number of options to provide the input files if not all(len(x) == len(in_opts) for x in bench_files): - raise CommandError("The length of bench_files and in_opts must " - "be the same.") + raise CommandError("The length of bench_files and in_opts " + "must be the same.") bench_str = make_bench_suite_files(command, in_opts, bench_files, - out_opt) + out_opt, pbs, job_prefix, queue, + pbs_extra_args) else: + # Not enough parameters! raise CommandError("Must specify parameters or bench_files.") result['bench_suite'] = bench_str return result -CommandConstructor = BenchSuiteMaker \ No newline at end of file +CommandConstructor = BenchSuiteMaker From 8cc9938273089bbf2c668afe91aab30acabd49cf Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 13 Feb 2014 17:21:10 -0700 Subject: [PATCH 02/24] Adding cluster utils --- scaling/cluter_util.py | 57 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 scaling/cluter_util.py diff --git a/scaling/cluter_util.py b/scaling/cluter_util.py new file mode 100644 index 0000000..22dfaa5 --- /dev/null +++ b/scaling/cluter_util.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python + +__author__ = "Jose Antonio Navas Molina" +__copyright__ = "Copyright 2014, The QIIME-Scaling Project" +__credits__ = ["Jose Antonio Navas Molina", "Daniel McDonald"] +__license__ = "BSD" +__version__ = "0.0.1-dev" +__maintainer__ = "Jose Antonio Navas Molina" +__email__ = "josenavasmolina@gmail.com" +__status__ = "Development" + +import subprocess +import os +from time import sleep + + +def check_status(jobs_to_monitor): + """Check the status of the passed list of jobs + + Inputs: + jobs_to_monitor: list of job ids + + Returns: + A subset of jobs_to_monitor containing those jobs that are still + running + """ + # Get all the commands running pf the current user + user = os.environ['USER'] + qstat_cmd = "qstat | grep %s" % user + proc = subprocess.Popen(qstat_cmd, ) + (stdout, stderr) = proc.communicate() + # Parse the qstat output + lines = stdout.splitlines() + running_jobs = [] + for l in lines: + job_id, job_name, user, time, status, queue = l.split() + # Check if this job is one of the jobs that we have to + # monitor and check if it is running or queued + if job_id in jobs_to_monitor and status in ['R', 'Q']: + running_jobs.append() + # Return the list with the running jobs that we're still waiting for + return running_jobs + + +def wait_on(jobs_to_monitor, poll_interval=5): + """Block while jobs to monitor are running + + Inputs: + jobs_to_monitor: list of job ids + poll_interval: interval between checks, in seconds + """ + # Loop until there is some job to monitor + while jobs_to_monitor: + # Sleep before new job status check + sleep(poll_interval) + # Check job status and get new set of jobs to wait on + jobs_to_monitor = check_status(jobs_to_monitor) From 6f891ffa902ccac82fd2fcc21a87ed113668002f Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 13 Feb 2014 17:33:32 -0700 Subject: [PATCH 03/24] Adding PBS submission support to the bench suite --- scaling/commands/bench_suite_maker.py | 2 +- scaling/make_bench_suite.py | 78 +++++++++++++++++++-------- 2 files changed, 58 insertions(+), 22 deletions(-) diff --git a/scaling/commands/bench_suite_maker.py b/scaling/commands/bench_suite_maker.py index 7af6926..9f9d892 100644 --- a/scaling/commands/bench_suite_maker.py +++ b/scaling/commands/bench_suite_maker.py @@ -54,7 +54,7 @@ class BenchSuiteMaker(Command): 'run in a PBS cluster environment', DefaultDescription='False: run serially in bash', Required=False, Default=False), - CoomandIn(Name='job_prefix', DataType=str, + CommandIn(Name='job_prefix', DataType=str, Description='Prefix for the job name in case of a PBS ' 'cluster environment', DefaultDescription='"bench_" is used as a default prefix', diff --git a/scaling/make_bench_suite.py b/scaling/make_bench_suite.py index ccc9db0..9a86d86 100644 --- a/scaling/make_bench_suite.py +++ b/scaling/make_bench_suite.py @@ -45,10 +45,17 @@ MKDIR_TIMING_CMD = "mkdir $timing_dest/%s\n" # The command template follows this structure -# timing_wrapper.sh -COMMAND_TEMPLATE = """ timing_wrapper.sh $timing_dest/%s/$i.txt %s %s %s $output_dest/%s/$i""" +# timing_wrapper.sh +# +COMMAND_TEMPLATE = (" timing_wrapper.sh $timing_dest/%s/$i.txt %s %s %s " + "$output_dest/%s/$i") -# The bash loop used to execute the commands as many times as provided by the user +# The PBS template follows this structure +# echo "cd $PWD; " | qsub -k oe -N -q +PBS_TEMPLATE = (" echo \"cd $PWD; %s\" | qsub -k oe -N %s -q %s %s") + +# The bash loop used to execute the commands as many times as +# provided by the user FOR_LOOP = """# Loop as many times as desired for i in `seq $num_rep` do @@ -60,7 +67,9 @@ """ # Bash command to collapse the results and generate the scaling plots -GET_RESULTS = """scaling process-bench-results -i $timing_dest/%s -o $dest/plots/%s\n""" +GET_RESULTS = ("scaling process-bench-results -i $timing_dest/%s -o " + "$dest/plots/%s\n") + def get_command_string(command, base_name, opts, values, out_opt): """Generates the bash string with the benchmark command @@ -76,12 +85,12 @@ def get_command_string(command, base_name, opts, values, out_opt): Note: raises a ValueError if the number of options and the number of values provided does not match - The opts list and the values list are paired in the same order, i.e. opts[0] - with values[0], opts[1] with values[1] and so on. + The opts list and the values list are paired in the same order, i.e. + opts[0] with values[0], opts[1] with values[1] and so on. """ if len(opts) != len(values): raise ValueError("The number of options and the number of values " - "provided must be the same") + "provided must be the same") # Get the string with the input options and their values in_opts = [] for opt, val in zip(opts, values): @@ -90,14 +99,16 @@ def get_command_string(command, base_name, opts, values, out_opt): options_str = " ".join(in_opts) return COMMAND_TEMPLATE % (base_name, command, options_str, - out_opt, base_name) + out_opt, base_name) + -def make_bench_suite_files(command, in_opts, bench_files, out_opt): +def make_bench_suite_files(command, in_opts, bench_files, out_opt, pbs=False, + job_prefix="bench_", queue="", pbs_extra_args=""): """Generates a string with the bash commands to execute the benchmark suite Inputs: command: string with the base command to execute - in_opts: list with the options used to provide the input files to the + in_opts: list with the options used to provide the input files to the command bench_files: list of lists with the input files for each bench case e.g. [ ["option1_file1","option2_file1"], @@ -105,6 +116,12 @@ def make_bench_suite_files(command, in_opts, bench_files, out_opt): ["option1_file3","option2_file3"] ] out_opt: string with the option used to indicate the output path to the command + pbs: flag to determine if the benchmark suite will run in a PBS + cluster environment + job_prefix: prefix for the job name in case of a PBS cluster + environment + queue: PBS queue to submit jobs + pbs_extra_args: any extra arguments needed to qsub """ # Initialize the result string list with the bash header # Get the base name of the command @@ -113,26 +130,34 @@ def make_bench_suite_files(command, in_opts, bench_files, out_opt): # Iterate over all the benchmark files commands = [] for bfs in bench_files: - # Add the command to create the directory to store the results of the + # Add the command to create the directory to store the results of the # benchmark suite bf = bfs[0] base_name = splitext(basename(bf))[0] result.append(MKDIR_OUTPUT_CMD % base_name) result.append(MKDIR_TIMING_CMD % base_name) # Get the string of the command to be executed - commands.append( get_command_string(command, base_name, in_opts, - bfs, out_opt) ) + commands.append(get_command_string(command, base_name, in_opts, + bfs, out_opt)) + if pbs: + # We are creating a benchmark suite in a cluster environment + # Add the qsub command for each job + commands = [PBS_TEMPLATE % (cmd, job_prefix, queue, pbs_extra_args) + for cmd in commands] # Insert the command in the bash for loop and # append these lines to the result string - result.append( FOR_LOOP % ("\n".join(commands)) ) + result.append(FOR_LOOP % ("\n".join(commands))) # Append to the results string the command to get the results and # generate the benchmark plots - result.append(GET_RESULTS % ("","")) + result.append(GET_RESULTS % ("", "")) return "".join(result) -def make_bench_suite_parameters(command, parameters, out_opt): + +def make_bench_suite_parameters(command, parameters, out_opt, pbs=False, + job_prefix="bench_", queue="", + pbs_extra_args=""): """Generates a string with the bash commands to execute the benchmark suite - + Inputs: command: string with the command to execute parameters: dictionary with the parameter values to test, keyed by @@ -141,6 +166,12 @@ def make_bench_suite_parameters(command, parameters, out_opt): 'param2' : ["val1", "val2", "val3"]} out_opt: string with the option used to indicate the output path to the command + pbs: flag to determine if the benchmark suite will run in a PBS + cluster environment + job_prefix: prefix for the job name in case of a PBS cluster + environment + queue: PBS queue to submit jobs + pbs_extra_args: any extra arguments needed to qsub """ # Initialize the result string list with the bash header # Get the base name of the command @@ -164,13 +195,18 @@ def make_bench_suite_parameters(command, parameters, out_opt): result.append(MKDIR_TIMING_CMD % param_dir) # Get the string of the command to be executed param_str = "--" + param - commands.append( get_command_string(command, param_dir, [param_str], - [val], out_opt) ) + commands.append(get_command_string(command, param_dir, [param_str], + [val], out_opt)) + if pbs: + # We are creating a benchmark suite in a cluster environment + # Add the qsub command for each job + commands = [PBS_TEMPLATE % (cmd, job_prefix, queue, pbs_extra_args) + for cmd in commands] # Insert the commands in the bash for loop and # append these lines to the result string - result.append( FOR_LOOP % ("\n".join(commands)) ) + result.append(FOR_LOOP % ("\n".join(commands))) # Append the result string for each parameter to get the # results and generate the benchmark plots result.append("mkdir $dest/plots\n") result.extend(get_results_list) - return "".join(result) \ No newline at end of file + return "".join(result) From db36ccaabf8bcddb65e357c6546642bff4a87489 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 13 Feb 2014 17:55:19 -0700 Subject: [PATCH 04/24] Modify optparse interface to accept the new parameters for PBS support --- scaling/commands/bench_suite_maker.py | 2 +- .../optparse/config/make_bench_suite.py | 168 ++++++++++-------- scaling/make_bench_suite.py | 2 +- 3 files changed, 97 insertions(+), 75 deletions(-) diff --git a/scaling/commands/bench_suite_maker.py b/scaling/commands/bench_suite_maker.py index 9f9d892..890954c 100644 --- a/scaling/commands/bench_suite_maker.py +++ b/scaling/commands/bench_suite_maker.py @@ -67,7 +67,7 @@ class BenchSuiteMaker(Command): CommandIn(Name='pbs_extra_args', DataType=str, Description='Any extra arguments needed to qsub', DefaultDescription='No extra arguments are used', - Required=False, Default=None) + Required=False, Default="") ]) CommandOuts = ParameterCollection([ CommandOut(Name='bench_suite', DataType=str, diff --git a/scaling/interfaces/optparse/config/make_bench_suite.py b/scaling/interfaces/optparse/config/make_bench_suite.py index ccb535c..6d8aef9 100644 --- a/scaling/interfaces/optparse/config/make_bench_suite.py +++ b/scaling/interfaces/optparse/config/make_bench_suite.py @@ -18,7 +18,7 @@ from pyqi.core.interfaces.optparse.output_handler import write_string from scaling.commands.bench_suite_maker import CommandConstructor from scaling.interfaces.optparse.input_handler import (get_bench_paths, - load_parameters) + load_parameters) # Convenience function for looking up parameters by name. cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor) @@ -28,24 +28,27 @@ # optparse interface. usage_examples = [ OptparseUsageExample(ShortDesc="Parameters example usage", - LongDesc="Test the command \"pick_otus.py\" using different " - "similarity values. The file parameters.txt should follow these " - "structure\nsimilarityval1,val2,val3", - Ex="%prog -c \"pick_otus.py -i seqs.fna\" -p parameters.txt " - "-o pick_otus_bench_suite.sh"), + LongDesc="Test the command \"pick_otus.py\" using " + "different similarity values. The file parameters.txt" + " should follow these structure\nsimilarity" + "val1,val2,val3", + Ex="%prog -c \"pick_otus.py -i seqs.fna\" -p " + "parameters.txt -o pick_otus_bench_suite.sh"), OptparseUsageExample(ShortDesc="Input files example usage", - LongDesc="Test the command \"pick_otus.py\" using different input files" - "The folder bench_files should include only the input files used " - "by the command", - Ex="%prog -c \"pick_otus.py\" -i bench_files -o " - "pick_otus_bench_suite.sh"), + LongDesc="Test the command \"pick_otus.py\" using " + "different input files. The folder bench_files should" + " include only the input files used by the command", + Ex="%prog -c \"pick_otus.py\" -i bench_files -o " + "pick_otus_bench_suite.sh"), OptparseUsageExample(ShortDesc="Multiple input files example usage", - LongDesc="Test the command \"split_librarires_fastq.py\" using " - "different input files. These command takes the input file in pairs" - " so we provide the input files in two different folders", - Ex="%prog -c \"split_librarires_fastq.py -m mapping.txt\" -i " - "seqs_folder,barcode_folder --in_opts \"-i,-q\" -o " - "split_librarires_fastq_bench_suite.sh") + LongDesc="Test the command \"split_librarires_fastq." + "py\" using different input files. These command " + "takes the input file in pairs so we provide the " + "input files in two different folders", + Ex="%prog -c \"split_librarires_fastq.py -m " + "mapping.txt\" -i seqs_folder,barcode_folder " + "--in_opts \"-i,-q\" -o " + "split_librarires_fastq_bench_suite.sh") ] # inputs map command line arguments and values onto Parameters. It is possible @@ -53,83 +56,102 @@ inputs = [ OptparseOption(Parameter=cmd_in_lookup('bench_files'), Type='existing_dirpaths', - Action='store', # default is 'store', change if desired - Handler=get_bench_paths, # must be defined if desired - ShortName='i', # must be defined if desired - # Name='bench_files', # implied by Parameter - # Required=False, # implied by Parameter - # Help='List of lists of paths to the benchmark files to use as input for the command. Each inner list is a test case and should have the same length as the in_opts parameter.', # implied by Parameter - # Default=None, # implied by Parameter - # DefaultDescription='No bench_files used', # implied by Parameter), + Action='store', + Handler=get_bench_paths, + ShortName='i', + # Name='bench_files', + # Required=False, + # Help='List of lists of paths to the benchmark files to use + # as input for the command. Each inner list is a test + # case and should have the same length as the in_opts + # parameter.', + # Default=None, + # DefaultDescription='No bench_files used', ), OptparseOption(Parameter=cmd_in_lookup('command'), Type='str', - Action='store', # default is 'store', change if desired - Handler=None, # must be defined if desired - ShortName='c', # must be defined if desired - # Name='command', # implied by Parameter - # Required=True, # implied by Parameter - # Help='command to benchmark', # implied by Parameter + Action='store', + Handler=None, + ShortName='c', + # Name='command', + # Required=True, + # Help='command to benchmark', ), OptparseOption(Parameter=cmd_in_lookup('in_opts'), Type='str', - Action='store', # default is 'store', change if desired - Handler=string_list_handler, # must be defined if desired - ShortName=None, # must be defined if desired - # Name='in_opts', # implied by Parameter - # Required=False, # implied by Parameter - # Help='list of options used for providing the benchmark files to the command. It should have the same length and order than the inner lists of bench_files.', # implied by Parameter - Default='-i', # implied by Parameter - # DefaultDescription='No in_opts used', # implied by Parameter), + Action='store', + Handler=string_list_handler, + ShortName=None, + # Name='in_opts', + # Required=False, + # Help='list of options used for providing the benchmark + # files to the command. It should have the same length + # and order than the inner lists of bench_files.' + Default='-i', + # DefaultDescription='["-i"] is used as a default' ), OptparseOption(Parameter=cmd_in_lookup('out_opt'), Type='str', - Action='store', # default is 'store', change if desired - Handler=None, # must be defined if desired - ShortName=None, # must be defined if desired + Action='store', + Handler=None, + ShortName=None, # Name='out_opt', # implied by Parameter # Required=False, # implied by Parameter - # Help='Option used for providing the output path to the command to benchmark.', # implied by Parameter - # Default='-o', # implied by Parameter - # DefaultDescription='"-o" is used as default', # implied by Parameter), + # Help='Option used for providing the output path to the + # command to benchmark.', + # Default='-o', + # DefaultDescription='"-o" is used as default' ), OptparseOption(Parameter=cmd_in_lookup('parameters'), Type='existing_filepath', - Action='store', # default is 'store', change if desired - Handler=load_parameters, # must be defined if desired - ShortName='p', # must be defined if desired - # Name='parameters', # implied by Parameter - # Required=False, # implied by Parameter - # Help='dictionary where the keys are the parameters to test and the values are a list of values for such parameter.', # implied by Parameter - # Default=None, # implied by Parameter - # DefaultDescription='No parameters used', # implied by Parameter), + Action='store', + Handler=load_parameters, + ShortName='p', + # Name='parameters', + # Required=False, + # Help='dictionary where the keys are the parameters to test + # and the values are a list of values for such + # parameter.', + # Default=None, + # DefaultDescription='No parameters used', + ), + OptparseOption(Parameter=cmd_in_lookup('pbs'), + Type=None, + Action='store_true', + Handler=None, + ShortName=None, + ), + OptparseOption(Parameter=cmd_in_lookup('job_prefix'), + Type=str, + Action='store', + Handler=None, + ShortName=None, + ), + OptparseOption(Parameter=cmd_in_lookup('queue'), + Type=str, + Action='store', + Handler=None, + ShortName=None, + ), + OptparseOption(Parameter=cmd_in_lookup('pbs_extra_args'), + Type=str, + Action='store', + Handler=None, + ShortName=None, ), OptparseOption(Parameter=None, - Type='new_filepath', - ShortName='o', - Name='output-fp', - Required=True, - Help='the output filepath') + Type='new_filepath', + ShortName='o', + Name='output-fp', + Required=True, + Help='the output filepath') ] # outputs map result keys to output options and handlers. It is not necessary # to supply an associated option, but if you do, it must be an option from the # inputs list (above). outputs = [ - # An example option that maps to a CommandIn. - # OptparseResult(Parameter=cmd_out_lookup('name_of_a_command_out'), - # Handler=write_string, # a function applied to the output of the Command - # # the name of the option (defined in inputs, above), whose - # # value will be made available to Handler. This name - # # can be either an underscored or dashed version of the - # # option name (e.g., 'output_fp' or 'output-fp') - # InputName='output-fp'), - # - # An example option that does not map to a CommandIn. - # OptparseResult(Parameter=cmd_out_lookup('some_other_result'), - # Handler=print_string) - OptparseResult(Parameter=cmd_out_lookup('bench_suite'), - Handler=write_string, # must be defined - InputName='output-fp'), # define if tying to an OptparseOption + Handler=write_string, + InputName='output-fp'), ] diff --git a/scaling/make_bench_suite.py b/scaling/make_bench_suite.py index 9a86d86..73f4b7b 100644 --- a/scaling/make_bench_suite.py +++ b/scaling/make_bench_suite.py @@ -68,7 +68,7 @@ # Bash command to collapse the results and generate the scaling plots GET_RESULTS = ("scaling process-bench-results -i $timing_dest/%s -o " - "$dest/plots/%s\n") + "$dest/plots/%s") def get_command_string(command, base_name, opts, values, out_opt): From 8e1c872764392f63c748c798678f2407642efba3 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 13 Feb 2014 17:56:13 -0700 Subject: [PATCH 05/24] Fixing file name typo --- scaling/{cluter_util.py => cluster_util.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scaling/{cluter_util.py => cluster_util.py} (100%) diff --git a/scaling/cluter_util.py b/scaling/cluster_util.py similarity index 100% rename from scaling/cluter_util.py rename to scaling/cluster_util.py From ecc79525571094b519366c8072e87bdabce608dc Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 13 Feb 2014 18:00:54 -0700 Subject: [PATCH 06/24] Fixing tests --- scaling/make_bench_suite.py | 6 +++--- tests/test_make_bench_suite.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scaling/make_bench_suite.py b/scaling/make_bench_suite.py index 73f4b7b..983f8ce 100644 --- a/scaling/make_bench_suite.py +++ b/scaling/make_bench_suite.py @@ -68,7 +68,7 @@ # Bash command to collapse the results and generate the scaling plots GET_RESULTS = ("scaling process-bench-results -i $timing_dest/%s -o " - "$dest/plots/%s") + "$dest/plots/%s %s\n") def get_command_string(command, base_name, opts, values, out_opt): @@ -149,7 +149,7 @@ def make_bench_suite_files(command, in_opts, bench_files, out_opt, pbs=False, result.append(FOR_LOOP % ("\n".join(commands))) # Append to the results string the command to get the results and # generate the benchmark plots - result.append(GET_RESULTS % ("", "")) + result.append(GET_RESULTS % ("", "", "")) return "".join(result) @@ -186,7 +186,7 @@ def make_bench_suite_parameters(command, parameters, out_opt, pbs=False, result.append(MKDIR_OUTPUT_CMD % param) result.append(MKDIR_TIMING_CMD % param) # Loop through all the possible values of the current parameter - get_results_list.append(GET_RESULTS % (param, param)) + get_results_list.append(GET_RESULTS % (param, param, "")) for val in parameters[param]: # Create a directory for storing the output commands # and timing results for current parameter value diff --git a/tests/test_make_bench_suite.py b/tests/test_make_bench_suite.py index 341f560..e636ee9 100644 --- a/tests/test_make_bench_suite.py +++ b/tests/test_make_bench_suite.py @@ -136,7 +136,7 @@ def test_make_bench_suite_parameters_multiple(self): done # Get the benchmark results and produce the plots -scaling process-bench-results -i $timing_dest/ -o $dest/plots/ +scaling process-bench-results -i $timing_dest/ -o $dest/plots/ """ exp_bench_suite_files_multiple = """#!/bin/bash @@ -181,7 +181,7 @@ def test_make_bench_suite_parameters_multiple(self): done # Get the benchmark results and produce the plots -scaling process-bench-results -i $timing_dest/ -o $dest/plots/ +scaling process-bench-results -i $timing_dest/ -o $dest/plots/ """ exp_bench_suite_parameters_single = """#!/bin/bash @@ -229,7 +229,7 @@ def test_make_bench_suite_parameters_multiple(self): # Get the benchmark results and produce the plots mkdir $dest/plots -scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start +scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start """ exp_bench_suite_parameters_multiple = """#!/bin/bash @@ -288,8 +288,8 @@ def test_make_bench_suite_parameters_multiple(self): # Get the benchmark results and produce the plots mkdir $dest/plots -scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start -scaling process-bench-results -i $timing_dest/similarity -o $dest/plots/similarity +scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start +scaling process-bench-results -i $timing_dest/similarity -o $dest/plots/similarity """ if __name__ == '__main__': From 115ce33b1f292fb6e88e86b21985e81b4334a52e Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 13 Feb 2014 21:17:32 -0700 Subject: [PATCH 07/24] Making the prefix of each command unique --- scaling/make_bench_suite.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scaling/make_bench_suite.py b/scaling/make_bench_suite.py index 983f8ce..bd5b7ba 100644 --- a/scaling/make_bench_suite.py +++ b/scaling/make_bench_suite.py @@ -52,7 +52,7 @@ # The PBS template follows this structure # echo "cd $PWD; " | qsub -k oe -N -q -PBS_TEMPLATE = (" echo \"cd $PWD; %s\" | qsub -k oe -N %s -q %s %s") +PBS_TEMPLATE = (" echo \"cd $PWD; %s\" | qsub -k oe -N %s%d -q %s %s") # The bash loop used to execute the commands as many times as # provided by the user @@ -142,8 +142,8 @@ def make_bench_suite_files(command, in_opts, bench_files, out_opt, pbs=False, if pbs: # We are creating a benchmark suite in a cluster environment # Add the qsub command for each job - commands = [PBS_TEMPLATE % (cmd, job_prefix, queue, pbs_extra_args) - for cmd in commands] + commands = [PBS_TEMPLATE % (cmd, job_prefix, i, queue, pbs_extra_args) + for i, cmd in enumerate(commands)] # Insert the command in the bash for loop and # append these lines to the result string result.append(FOR_LOOP % ("\n".join(commands))) From 4de21c2867d413d5f127824891638faff68c2193 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 14 Feb 2014 11:21:46 -0700 Subject: [PATCH 08/24] Add job id tracking on the shell script --- scaling/make_bench_suite.py | 23 +++++++++++++++------- tests/test_make_bench_suite.py | 36 +++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 23 deletions(-) diff --git a/scaling/make_bench_suite.py b/scaling/make_bench_suite.py index bd5b7ba..d7db989 100644 --- a/scaling/make_bench_suite.py +++ b/scaling/make_bench_suite.py @@ -50,9 +50,11 @@ COMMAND_TEMPLATE = (" timing_wrapper.sh $timing_dest/%s/$i.txt %s %s %s " "$output_dest/%s/$i") -# The PBS template follows this structure -# echo "cd $PWD; " | qsub -k oe -N -q -PBS_TEMPLATE = (" echo \"cd $PWD; %s\" | qsub -k oe -N %s%d -q %s %s") +# The PBS template follows this structure - blah=${blah#?} +# +=";"`echo "cd $PWD; " | qsub -k oe -N +# -q ` +PBS_CMD_TEMPLATE = (" %s+=\";\"`echo \"cd $PWD; %s\" | qsub -k oe" + " -N %s%d -q %s %s`") # The bash loop used to execute the commands as many times as # provided by the user @@ -141,12 +143,17 @@ def make_bench_suite_files(command, in_opts, bench_files, out_opt, pbs=False, bfs, out_opt)) if pbs: # We are creating a benchmark suite in a cluster environment + # Clean up the scaling_jobs variable + result.append("scaling_jobs=\"\"\n") # Add the qsub command for each job - commands = [PBS_TEMPLATE % (cmd, job_prefix, i, queue, pbs_extra_args) - for i, cmd in enumerate(commands)] + commands = [PBS_CMD_TEMPLATE % ("scaling_jobs", cmd, job_prefix, i, + queue, pbs_extra_args) for i, cmd in enumerate(commands)] # Insert the command in the bash for loop and # append these lines to the result string result.append(FOR_LOOP % ("\n".join(commands))) + if pbs: + # We need to remove the first ";" character of scaling_jobs + result.append("scaling_jobs=${scaling_jobs#?}\n") # Append to the results string the command to get the results and # generate the benchmark plots result.append(GET_RESULTS % ("", "", "")) @@ -199,9 +206,11 @@ def make_bench_suite_parameters(command, parameters, out_opt, pbs=False, [val], out_opt)) if pbs: # We are creating a benchmark suite in a cluster environment + # Clean up the scaling_jobs variable + result.append("scaling_jobs=\"\"") # Add the qsub command for each job - commands = [PBS_TEMPLATE % (cmd, job_prefix, queue, pbs_extra_args) - for cmd in commands] + commands = [PBS_CMD_TEMPLATE % ("scaling_jobs", cmd, job_prefix, i, + queue, pbs_extra_args) for i, cmd in enumerate(commands)] # Insert the commands in the bash for loop and # append these lines to the result string result.append(FOR_LOOP % ("\n".join(commands))) diff --git a/tests/test_make_bench_suite.py b/tests/test_make_bench_suite.py index e636ee9..031c28c 100644 --- a/tests/test_make_bench_suite.py +++ b/tests/test_make_bench_suite.py @@ -11,11 +11,13 @@ from unittest import TestCase, main from scaling.make_bench_suite import (get_command_string, - make_bench_suite_files, make_bench_suite_parameters) + make_bench_suite_files, + make_bench_suite_parameters) + class TestGetCommandString(TestCase): """Tests the get_command_string function""" - + def test_get_command_string_single(self): """Correctly generates a command with a single input option""" cmd = "pick_otus.py" @@ -24,8 +26,8 @@ def test_get_command_string_single(self): values = ['1000000.fna'] out_opt = "-o" obs = get_command_string(cmd, base_name, opts, values, out_opt) - exp = (" timing_wrapper.sh $timing_dest/1000000/$i.txt pick_otus.py " - "-i 1000000.fna -o $output_dest/1000000/$i") + exp = (" timing_wrapper.sh $timing_dest/1000000/$i.txt pick_otus.py" + " -i 1000000.fna -o $output_dest/1000000/$i") self.assertEqual(obs, exp) def test_get_command_string_multiple(self): @@ -33,12 +35,12 @@ def test_get_command_string_multiple(self): cmd = "split_libraries_fastq.py -m mapping.txt" base_name = "1000000" opts = ['-i', '-b'] - values = ['reads/1000000.fna','barcodes/1000000.fna'] + values = ['reads/1000000.fna', 'barcodes/1000000.fna'] out_opt = "-o" obs = get_command_string(cmd, base_name, opts, values, out_opt) exp = (" timing_wrapper.sh $timing_dest/1000000/$i.txt " - "split_libraries_fastq.py -m mapping.txt -i reads/1000000.fna -b " - "barcodes/1000000.fna -o $output_dest/1000000/$i") + "split_libraries_fastq.py -m mapping.txt -i reads/1000000.fna " + "-b barcodes/1000000.fna -o $output_dest/1000000/$i") self.assertEqual(obs, exp) def test_get_command_string_error(self): @@ -49,7 +51,8 @@ def test_get_command_string_error(self): values = ['1000000.fna'] out_opt = "-o" self.assertRaises(ValueError, get_command_string, cmd, base_name, opts, - values, out_opt) + values, out_opt) + class TestMakeBenchSuiteFiles(TestCase): """Tests the make_bench_suite_files function""" @@ -64,23 +67,24 @@ def test_make_bench_suite_files_single(self): self.assertEqual(obs, exp_bench_suite_files_single) def test_make_bench_suite_files_multiple(self): - """Correctly generates the benchmark suite for multiple input options""" + """Correctly generates the bench suite for multiple input options""" cmd = "split_libraries_fastq.py -m mapping.txt" in_opts = ["-i", "-b"] - bench_files = [["reads/1000000.fna","barcodes/1000000.fna"], - ["reads/2000000.fna","barcodes/2000000.fna"], - ["reads/3000000.fna","barcodes/3000000.fna"]] + bench_files = [["reads/1000000.fna", "barcodes/1000000.fna"], + ["reads/2000000.fna", "barcodes/2000000.fna"], + ["reads/3000000.fna", "barcodes/3000000.fna"]] out_opt = "-o" obs = make_bench_suite_files(cmd, in_opts, bench_files, out_opt) self.assertEqual(obs, exp_bench_suite_files_multiple) + class TestMakeBenchSuiteParameters(TestCase): """Tests the make_bench_suite_parameters function""" def test_make_bench_suite_parameters_single(self): """Correctly generates the benchmark suite for a single parameter""" cmd = "parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna" - params = {"jobs_to_start" : ["8", "16", "32"]} + params = {"jobs_to_start": ["8", "16", "32"]} out_opt = "-o" obs = make_bench_suite_parameters(cmd, params, out_opt) self.assertEqual(obs, exp_bench_suite_parameters_single) @@ -88,8 +92,8 @@ def test_make_bench_suite_parameters_single(self): def test_make_bench_suite_parameters_multiple(self): """Correctly generates the benchmark suite for multiple parameters""" cmd = "parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna" - params = {"jobs_to_start" : ["8", "16", "32"], - "similarity" : ["0.94", "0.97", "0.99"]} + params = {"jobs_to_start": ["8", "16", "32"], + "similarity": ["0.94", "0.97", "0.99"]} out_opt = "-o" obs = make_bench_suite_parameters(cmd, params, out_opt) self.assertEqual(obs, exp_bench_suite_parameters_multiple) @@ -293,4 +297,4 @@ def test_make_bench_suite_parameters_multiple(self): """ if __name__ == '__main__': - main() \ No newline at end of file + main() From 70ab5a901ee05f9dddad44e3de869dff3a9994ee Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 14 Feb 2014 19:10:27 -0700 Subject: [PATCH 09/24] Support multiple bash variables for keeping track of job_id and create test for the new functionality --- scaling/make_bench_suite.py | 52 ++++++++---- tests/test_make_bench_suite.py | 139 +++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 14 deletions(-) diff --git a/scaling/make_bench_suite.py b/scaling/make_bench_suite.py index d7db989..d00f167 100644 --- a/scaling/make_bench_suite.py +++ b/scaling/make_bench_suite.py @@ -154,9 +154,13 @@ def make_bench_suite_files(command, in_opts, bench_files, out_opt, pbs=False, if pbs: # We need to remove the first ";" character of scaling_jobs result.append("scaling_jobs=${scaling_jobs#?}\n") - # Append to the results string the command to get the results and - # generate the benchmark plots - result.append(GET_RESULTS % ("", "", "")) + # Append to the results string the command to get the results and + # generate the benchmark plots + result.append(GET_RESULTS % ("", "", "-w $scaling_jobs")) + else: + # Append to the results string the command to get the results and + # generate the benchmark plots + result.append(GET_RESULTS % ("", "", "")) return "".join(result) @@ -184,16 +188,19 @@ def make_bench_suite_parameters(command, parameters, out_opt, pbs=False, # Get the base name of the command base_cmd = command.split(" ")[0].split(".")[0] result = [BASH_HEADER % base_cmd] - # Iterate over the benchmark parameters + # Iterate over the parameters to benchmark commands = [] get_results_list = [] + # These two variables are used in case of a pbs env + count = 0 + var_jobs = [] for param in parameters: # Add the commands to create the directories to store the # results of the benchmark suite result.append(MKDIR_OUTPUT_CMD % param) result.append(MKDIR_TIMING_CMD % param) # Loop through all the possible values of the current parameter - get_results_list.append(GET_RESULTS % (param, param, "")) + param_cmds = [] for val in parameters[param]: # Create a directory for storing the output commands # and timing results for current parameter value @@ -202,20 +209,37 @@ def make_bench_suite_parameters(command, parameters, out_opt, pbs=False, result.append(MKDIR_TIMING_CMD % param_dir) # Get the string of the command to be executed param_str = "--" + param - commands.append(get_command_string(command, param_dir, [param_str], - [val], out_opt)) - if pbs: - # We are creating a benchmark suite in a cluster environment - # Clean up the scaling_jobs variable - result.append("scaling_jobs=\"\"") - # Add the qsub command for each job - commands = [PBS_CMD_TEMPLATE % ("scaling_jobs", cmd, job_prefix, i, - queue, pbs_extra_args) for i, cmd in enumerate(commands)] + param_cmds.append(get_command_string(command, param_dir, + [param_str], [val], out_opt)) + # Check if we are crating the command for a cluster environment + if pbs: + var_job = "%s_jobs" % param + var_jobs.append(var_job) + param_cmds = [PBS_CMD_TEMPLATE % (var_job, cmd, job_prefix, + count + i, queue, pbs_extra_args) for i, cmd in + enumerate(param_cmds)] + count += len(param_cmds) + # Create the process results command + get_results_list.append(GET_RESULTS % (param, param, + "-w $%s" % var_job)) + else: + # Create the process results command + get_results_list.append(GET_RESULTS % (param, param, "")) + # Extend the commands list with the param commands + commands.extend(param_cmds) + # Clean up bash variables + # Note that if we are not in a pbs command, var_jobs is empty + for var_job in var_jobs: + result.append("%s=\"\"\n" % var_job) # Insert the commands in the bash for loop and # append these lines to the result string result.append(FOR_LOOP % ("\n".join(commands))) # Append the result string for each parameter to get the # results and generate the benchmark plots result.append("mkdir $dest/plots\n") + # Remove the first ";" character of the bash variables + # Note that if we are not in a pbs command, var_jobs is empty + for var_job in var_jobs: + result.append("%s=${%s#?}\n" % (var_job, var_job)) result.extend(get_results_list) return "".join(result) diff --git a/tests/test_make_bench_suite.py b/tests/test_make_bench_suite.py index 031c28c..8383926 100644 --- a/tests/test_make_bench_suite.py +++ b/tests/test_make_bench_suite.py @@ -77,6 +77,20 @@ def test_make_bench_suite_files_multiple(self): obs = make_bench_suite_files(cmd, in_opts, bench_files, out_opt) self.assertEqual(obs, exp_bench_suite_files_multiple) + def test_make_bench_suite_files_pbs(self): + """Correctly generates the bench suite for a pbs environment""" + cmd = "pick_otus.py" + in_opts = ["-i"] + bench_files = [["1000000.fna"], ["2000000.fna"], ["3000000.fna"]] + out_opt = "-o" + pbs = True + job_prefix = "test" + queue = "friendlyq" + pbs_extra_args = "-m abe" + obs = make_bench_suite_files(cmd, in_opts, bench_files, out_opt, pbs, + job_prefix, queue, pbs_extra_args) + self.assertEqual(obs, exp_bench_suite_files_pbs) + class TestMakeBenchSuiteParameters(TestCase): """Tests the make_bench_suite_parameters function""" @@ -98,6 +112,20 @@ def test_make_bench_suite_parameters_multiple(self): obs = make_bench_suite_parameters(cmd, params, out_opt) self.assertEqual(obs, exp_bench_suite_parameters_multiple) + def test_make_bench_suite_parameters_pbs(self): + """Correctly genreates the benchmark suite for a pbs environment""" + cmd = "parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna" + params = {"jobs_to_start": ["8", "16", "32"], + "similarity": ["0.94", "0.97", "0.99"]} + out_opt = "-o" + pbs = True + job_prefix = "test" + queue = "friendlyq" + pbs_extra_args = "-m abe" + obs = make_bench_suite_parameters(cmd, params, out_opt, pbs, + job_prefix, queue, pbs_extra_args) + self.assertEqual(obs, exp_bench_suite_parameters_pbs) + exp_bench_suite_files_single = """#!/bin/bash # Number of times each command should be executed @@ -188,6 +216,53 @@ def test_make_bench_suite_parameters_multiple(self): scaling process-bench-results -i $timing_dest/ -o $dest/plots/ """ +exp_bench_suite_files_pbs = """#!/bin/bash + +# Number of times each command should be executed +num_rep=1 + +# Check if the user supplied a (valid) number of repetitions +if [[ $# -eq 1 ]]; then + if [[ $1 =~ ^[0-9]+$ ]]; then + num_rep=$1 + else + echo "USAGE: $0 [num_reps]" + fi +fi + +# Get a string with current date (format YYYYMMDD_HHMMSS) to name +# the directory with the benchmark results +cdate=`date +_%Y%m%d_%H%M%S` +dest=$PWD/pick_otus$cdate +mkdir $dest + +# Create output directory structure +output_dest=$dest"/command_outputs" +timing_dest=$dest"/timing" + +mkdir $output_dest +mkdir $timing_dest +mkdir $output_dest/1000000 +mkdir $timing_dest/1000000 +mkdir $output_dest/2000000 +mkdir $timing_dest/2000000 +mkdir $output_dest/3000000 +mkdir $timing_dest/3000000 +scaling_jobs="" +# Loop as many times as desired +for i in `seq $num_rep` +do + # benchmarking commands: + scaling_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/1000000/$i.txt pick_otus.py -i 1000000.fna -o $output_dest/1000000/$i" | qsub -k oe -N test0 -q friendlyq -m abe` + scaling_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/2000000/$i.txt pick_otus.py -i 2000000.fna -o $output_dest/2000000/$i" | qsub -k oe -N test1 -q friendlyq -m abe` + scaling_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/3000000/$i.txt pick_otus.py -i 3000000.fna -o $output_dest/3000000/$i" | qsub -k oe -N test2 -q friendlyq -m abe` +done + +# Get the benchmark results and produce the plots +scaling_jobs=${scaling_jobs#?} +scaling process-bench-results -i $timing_dest/ -o $dest/plots/ -w $scaling_jobs +""" + exp_bench_suite_parameters_single = """#!/bin/bash # Number of times each command should be executed @@ -296,5 +371,69 @@ def test_make_bench_suite_parameters_multiple(self): scaling process-bench-results -i $timing_dest/similarity -o $dest/plots/similarity """ +exp_bench_suite_parameters_pbs = """#!/bin/bash + +# Number of times each command should be executed +num_rep=1 + +# Check if the user supplied a (valid) number of repetitions +if [[ $# -eq 1 ]]; then + if [[ $1 =~ ^[0-9]+$ ]]; then + num_rep=$1 + else + echo "USAGE: $0 [num_reps]" + fi +fi + +# Get a string with current date (format YYYYMMDD_HHMMSS) to name +# the directory with the benchmark results +cdate=`date +_%Y%m%d_%H%M%S` +dest=$PWD/parallel_pick_otus_uclust_ref$cdate +mkdir $dest + +# Create output directory structure +output_dest=$dest"/command_outputs" +timing_dest=$dest"/timing" + +mkdir $output_dest +mkdir $timing_dest +mkdir $output_dest/jobs_to_start +mkdir $timing_dest/jobs_to_start +mkdir $output_dest/jobs_to_start/8 +mkdir $timing_dest/jobs_to_start/8 +mkdir $output_dest/jobs_to_start/16 +mkdir $timing_dest/jobs_to_start/16 +mkdir $output_dest/jobs_to_start/32 +mkdir $timing_dest/jobs_to_start/32 +mkdir $output_dest/similarity +mkdir $timing_dest/similarity +mkdir $output_dest/similarity/0.94 +mkdir $timing_dest/similarity/0.94 +mkdir $output_dest/similarity/0.97 +mkdir $timing_dest/similarity/0.97 +mkdir $output_dest/similarity/0.99 +mkdir $timing_dest/similarity/0.99 +jobs_to_start_jobs="" +similarity_jobs="" +# Loop as many times as desired +for i in `seq $num_rep` +do + # benchmarking commands: + jobs_to_start_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/8/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 8 -o $output_dest/jobs_to_start/8/$i" | qsub -k oe -N test0 -q friendlyq -m abe` + jobs_to_start_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/16/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 16 -o $output_dest/jobs_to_start/16/$i" | qsub -k oe -N test1 -q friendlyq -m abe` + jobs_to_start_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/32/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 32 -o $output_dest/jobs_to_start/32/$i" | qsub -k oe -N test2 -q friendlyq -m abe` + similarity_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.94/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.94 -o $output_dest/similarity/0.94/$i" | qsub -k oe -N test3 -q friendlyq -m abe` + similarity_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.97/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.97 -o $output_dest/similarity/0.97/$i" | qsub -k oe -N test4 -q friendlyq -m abe` + similarity_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.99/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.99 -o $output_dest/similarity/0.99/$i" | qsub -k oe -N test5 -q friendlyq -m abe` +done + +# Get the benchmark results and produce the plots +mkdir $dest/plots +jobs_to_start_jobs=${jobs_to_start_jobs#?} +similarity_jobs=${similarity_jobs#?} +scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start -w $jobs_to_start_jobs +scaling process-bench-results -i $timing_dest/similarity -o $dest/plots/similarity -w $similarity_jobs +""" + if __name__ == '__main__': main() From 8babbda171672ad08e03723962a3223697abf415 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 14 Feb 2014 19:20:58 -0700 Subject: [PATCH 10/24] Modifying BenchResultsProcesser command to accept a list of job ids to wait for before processing the resutls --- scaling/commands/bench_results_processer.py | 31 ++++++++++++--------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/scaling/commands/bench_results_processer.py b/scaling/commands/bench_results_processer.py index 89fa0d0..86e2a89 100644 --- a/scaling/commands/bench_results_processer.py +++ b/scaling/commands/bench_results_processer.py @@ -10,44 +10,49 @@ __email__ = "josenavasmolina@gmail.com" __status__ = "Development" -from pyqi.core.command import (Command, CommandIn, CommandOut, - ParameterCollection) +from pyqi.core.command import (Command, CommandIn, CommandOut, + ParameterCollection) from scaling.process_results import process_benchmark_results from matplotlib.figure import Figure + class BenchResultsProcesser(Command): BriefDescription = "Processes the benchmark suite results" - LongDescription = "Takes the benchmark suite output directory and " +\ - "processes the benchmark measurements, creating plots and collapsing" +\ - " results in a usable form." + LongDescription = ("Takes the benchmark suite output directory and " + "processes the benchmark measurements, creating plots " + "and collapsing results in a usable form.") CommandIns = ParameterCollection([ CommandIn(Name='input_dir', DataType=str, Description='Path to the directory with the time results', Required=True), + CommandIn(Name='job_ids', DataType=list, + Description='List of job ids to wait for if running in a ' + 'pbs cluster', Required=False, Default=[]) ]) CommandOuts = ParameterCollection([ CommandOut(Name="bench_data", DataType=dict, - Description="Dictionary with the benchmark results"), + Description="Dictionary with the benchmark results"), CommandOut(Name="time_fig", DataType=Figure, - Description="Figure with the execution time results"), + Description="Figure with the execution time results"), CommandOut(Name="time_str", DataType=str, - Description="String with the best polynomial fit to the benchmark " - "execution time results"), + Description="String with the best polynomial fit to the " + "benchmark execution time results"), CommandOut(Name="mem_fig", DataType=Figure, - Description="Figure with the memory consumption results"), + Description="Figure with the memory consumption results"), CommandOut(Name="mem_str", DataType=str, - Description="String with the best polynomial fit to the benchmark " - "memory consumption results") + Description="String with the best polynomial fit to the " + "benchmark memory consumption results") ]) def run(self, **kwargs): result = {} input_dir = kwargs['input_dir'] + job_ids = kwargs['job_ids'] data, time_fig, time_str, mem_fig, mem_str = \ - process_benchmark_results(input_dir) + process_benchmark_results(input_dir, job_ids) result['bench_data'] = data result['time_fig'] = time_fig From 4aaa353f2c32cb0acd44c88896237d0864f5dace Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 14 Feb 2014 19:30:52 -0700 Subject: [PATCH 11/24] Modifying interface to accept the new parameter --- .../optparse/config/process_bench_results.py | 47 +++++++++++-------- 1 file changed, 28 insertions(+), 19 deletions(-) diff --git a/scaling/interfaces/optparse/config/process_bench_results.py b/scaling/interfaces/optparse/config/process_bench_results.py index 5714674..9caa0f2 100644 --- a/scaling/interfaces/optparse/config/process_bench_results.py +++ b/scaling/interfaces/optparse/config/process_bench_results.py @@ -14,10 +14,10 @@ OptparseOption, OptparseResult) from pyqi.core.command import (make_command_in_collection_lookup_f, make_command_out_collection_lookup_f) +from pyqi.core.interfaces.optparse.input_handler import string_list_handler from scaling.commands.bench_results_processer import CommandConstructor -from scaling.interfaces.optparse.output_handler import (write_summarized_results, - write_matplotlib_figure, - write_string_to_dir) +from scaling.interfaces.optparse.output_handler import \ + (write_summarized_results, write_matplotlib_figure, write_string_to_dir) # Convenience function for looking up parameters by name. cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor) @@ -36,13 +36,22 @@ inputs = [ OptparseOption(Parameter=cmd_in_lookup('input_dir'), Type='existing_dirpath', - Action='store', # default is 'store', change if desired - Handler=None, # must be defined if desired - ShortName='i', # must be defined if desired - # Name='input_dir', # implied by Parameter - # Required=True, # implied by Parameter - # Help='Path to the directory with the time results', # implied by Parameter + Action='store', + Handler=None, + ShortName='i', + # Name='input_dir', + # Required=True, + # Help='Path to the directory with the time results', ), + OptparseOption(Parameter=cmd_in_lookup('job_ids'), + Type='str', + Action='store', + Handler=string_list_handler, + ShortName='w', + Name='wait_on', + Required=False, + Help='Comma-separated list of job ids to wait for before ' + 'processing the results') OptparseOption(Parameter=None, Type='new_dirpath', ShortName='o', @@ -56,18 +65,18 @@ # inputs list (above). outputs = [ OptparseResult(Parameter=cmd_out_lookup('bench_data'), - Handler=write_summarized_results, - InputName='output-dir'), + Handler=write_summarized_results, + InputName='output-dir'), OptparseResult(Parameter=cmd_out_lookup('mem_fig'), - Handler=write_matplotlib_figure, - InputName='output-dir'), + Handler=write_matplotlib_figure, + InputName='output-dir'), OptparseResult(Parameter=cmd_out_lookup('mem_str'), - Handler=write_string_to_dir, - InputName='output-dir'), + Handler=write_string_to_dir, + InputName='output-dir'), OptparseResult(Parameter=cmd_out_lookup('time_fig'), - Handler=write_matplotlib_figure, - InputName='output-dir'), + Handler=write_matplotlib_figure, + InputName='output-dir'), OptparseResult(Parameter=cmd_out_lookup('time_str'), - Handler=write_string_to_dir, - InputName='output-dir'), + Handler=write_string_to_dir, + InputName='output-dir'), ] From d43128f29b7b1facebf68b91e31b99c665360a05 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 14 Feb 2014 19:32:53 -0700 Subject: [PATCH 12/24] Fixing typo --- scaling/make_bench_suite.py | 2 +- tests/test_make_bench_suite.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/scaling/make_bench_suite.py b/scaling/make_bench_suite.py index d00f167..7110046 100644 --- a/scaling/make_bench_suite.py +++ b/scaling/make_bench_suite.py @@ -53,7 +53,7 @@ # The PBS template follows this structure - blah=${blah#?} # +=";"`echo "cd $PWD; " | qsub -k oe -N # -q ` -PBS_CMD_TEMPLATE = (" %s+=\";\"`echo \"cd $PWD; %s\" | qsub -k oe" +PBS_CMD_TEMPLATE = (" %s+=\",\"`echo \"cd $PWD; %s\" | qsub -k oe" " -N %s%d -q %s %s`") # The bash loop used to execute the commands as many times as diff --git a/tests/test_make_bench_suite.py b/tests/test_make_bench_suite.py index 8383926..ad29df4 100644 --- a/tests/test_make_bench_suite.py +++ b/tests/test_make_bench_suite.py @@ -253,9 +253,9 @@ def test_make_bench_suite_parameters_pbs(self): for i in `seq $num_rep` do # benchmarking commands: - scaling_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/1000000/$i.txt pick_otus.py -i 1000000.fna -o $output_dest/1000000/$i" | qsub -k oe -N test0 -q friendlyq -m abe` - scaling_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/2000000/$i.txt pick_otus.py -i 2000000.fna -o $output_dest/2000000/$i" | qsub -k oe -N test1 -q friendlyq -m abe` - scaling_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/3000000/$i.txt pick_otus.py -i 3000000.fna -o $output_dest/3000000/$i" | qsub -k oe -N test2 -q friendlyq -m abe` + scaling_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/1000000/$i.txt pick_otus.py -i 1000000.fna -o $output_dest/1000000/$i" | qsub -k oe -N test0 -q friendlyq -m abe` + scaling_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/2000000/$i.txt pick_otus.py -i 2000000.fna -o $output_dest/2000000/$i" | qsub -k oe -N test1 -q friendlyq -m abe` + scaling_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/3000000/$i.txt pick_otus.py -i 3000000.fna -o $output_dest/3000000/$i" | qsub -k oe -N test2 -q friendlyq -m abe` done # Get the benchmark results and produce the plots @@ -419,12 +419,12 @@ def test_make_bench_suite_parameters_pbs(self): for i in `seq $num_rep` do # benchmarking commands: - jobs_to_start_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/8/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 8 -o $output_dest/jobs_to_start/8/$i" | qsub -k oe -N test0 -q friendlyq -m abe` - jobs_to_start_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/16/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 16 -o $output_dest/jobs_to_start/16/$i" | qsub -k oe -N test1 -q friendlyq -m abe` - jobs_to_start_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/32/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 32 -o $output_dest/jobs_to_start/32/$i" | qsub -k oe -N test2 -q friendlyq -m abe` - similarity_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.94/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.94 -o $output_dest/similarity/0.94/$i" | qsub -k oe -N test3 -q friendlyq -m abe` - similarity_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.97/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.97 -o $output_dest/similarity/0.97/$i" | qsub -k oe -N test4 -q friendlyq -m abe` - similarity_jobs+=";"`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.99/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.99 -o $output_dest/similarity/0.99/$i" | qsub -k oe -N test5 -q friendlyq -m abe` + jobs_to_start_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/8/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 8 -o $output_dest/jobs_to_start/8/$i" | qsub -k oe -N test0 -q friendlyq -m abe` + jobs_to_start_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/16/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 16 -o $output_dest/jobs_to_start/16/$i" | qsub -k oe -N test1 -q friendlyq -m abe` + jobs_to_start_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/32/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 32 -o $output_dest/jobs_to_start/32/$i" | qsub -k oe -N test2 -q friendlyq -m abe` + similarity_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.94/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.94 -o $output_dest/similarity/0.94/$i" | qsub -k oe -N test3 -q friendlyq -m abe` + similarity_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.97/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.97 -o $output_dest/similarity/0.97/$i" | qsub -k oe -N test4 -q friendlyq -m abe` + similarity_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.99/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.99 -o $output_dest/similarity/0.99/$i" | qsub -k oe -N test5 -q friendlyq -m abe` done # Get the benchmark results and produce the plots From 8e0a680375031885c06855f2f074f8d483f88691 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 14 Feb 2014 20:33:39 -0700 Subject: [PATCH 13/24] Adding wait_on functionality --- scaling/commands/bench_results_processer.py | 7 +++++-- scaling/process_results.py | 23 ++++++++++++++------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/scaling/commands/bench_results_processer.py b/scaling/commands/bench_results_processer.py index 86e2a89..e93456a 100644 --- a/scaling/commands/bench_results_processer.py +++ b/scaling/commands/bench_results_processer.py @@ -12,8 +12,9 @@ from pyqi.core.command import (Command, CommandIn, CommandOut, ParameterCollection) -from scaling.process_results import process_benchmark_results from matplotlib.figure import Figure +from scaling.process_results import process_benchmark_results +from scaling.cluster_util import wait_on class BenchResultsProcesser(Command): @@ -51,8 +52,10 @@ def run(self, **kwargs): input_dir = kwargs['input_dir'] job_ids = kwargs['job_ids'] + wait_on(job_ids) + data, time_fig, time_str, mem_fig, mem_str = \ - process_benchmark_results(input_dir, job_ids) + process_benchmark_results(input_dir) result['bench_data'] = data result['time_fig'] = time_fig diff --git a/scaling/process_results.py b/scaling/process_results.py index 90f6a3b..35be2b2 100644 --- a/scaling/process_results.py +++ b/scaling/process_results.py @@ -107,6 +107,7 @@ def process_timing_directory(timing_dir): # Return the output dictionary return data + def compute_rsquare(y, SSerr): """Computes the Rsquare value using the points y and the Sum of Squares @@ -125,11 +126,12 @@ def compute_rsquare(y, SSerr): SStot = sum( (y-mean)^2 ) """ mean = np.mean(y) - SStot = np.sum( (y-mean)**2 ) + SStot = np.sum((y-mean)**2) rsquare = 1 - (SSerr/SStot) return rsquare + def curve_fitting(x, y): """Fits a polynomial curve to the data points defined by the arrays x and y @@ -151,6 +153,7 @@ def curve_fitting(x, y): return poly, deg + def generate_poly_label(poly, deg): """Returns a string representing the given polynomial @@ -164,6 +167,7 @@ def generate_poly_label(poly, deg): s += str(poly[deg]) return s + def make_bench_plot(data, fit_key, keys, title, ylabel, scale=1): """Creates a matplotlib figure with the benchmark results present in data @@ -186,7 +190,7 @@ def make_bench_plot(data, fit_key, keys, title, ylabel, scale=1): poly, deg = curve_fitting(x, data[fit_key][0]) poly_label = generate_poly_label(poly, deg) y = np.polyval(poly, x2) - y = y /scale + y = y / scale figure = plt.figure() ax = figure.add_subplot(111) ax.plot(x2, y, 'k', label=poly_label) @@ -198,12 +202,12 @@ def make_bench_plot(data, fit_key, keys, title, ylabel, scale=1): ax.errorbar(x, y, yerr=y_err, label=key) fontP = FontProperties() fontP.set_size('small') - # figure.legend(loc='best', prop=fontP, fancybox=True).get_frame().set_alpha(0.2) figure.suptitle(title) ax.set_xlabel('Input file') ax.set_ylabel(ylabel) return figure, poly_label + def process_benchmark_results(input_dir): """Processes the benchmark results stored in input_dir @@ -229,6 +233,7 @@ def process_benchmark_results(input_dir): "Memory (GB)", scale=1024*1024) return data, time_plot, time_poly, mem_plot, mem_poly + def make_comparison_plot(data, x_axis, key, title, ylabel, scale=1): """Creates a matplotlib figure with the benchmark results of multiple runs @@ -250,6 +255,7 @@ def make_comparison_plot(data, x_axis, key, title, ylabel, scale=1): ax.set_ylabel(ylabel) return figure + def compare_benchmark_results(input_dirs, labels): """Compares in a single plot the benchmark results listed in input_dirs @@ -258,8 +264,8 @@ def compare_benchmark_results(input_dirs, labels): results labels: list of strings to label the plot data series - Note: raises a ValueError if all the benchmark results doesn't belong to the - same bench suite + Note: raises a ValueError if all the benchmark results doesn't belong to + the same bench suite """ # Get the benchmark results data = {} @@ -274,10 +280,11 @@ def compare_benchmark_results(input_dirs, labels): else: if set(x_axis) != set(d['label']): raise ValueError("In order to compare different benchmark " - "results, they should be over the same set of test cases") + "results, they should be over the same set of" + " test cases") # Generate comparison plots time_fig = make_comparison_plot(data, x_axis, 'wall_time', 'Running time', 'Time (seconds)') - mem_fig = make_comparison_plot(data, x_axis, 'memory', 'Memory usage', + mem_fig = make_comparison_plot(data, x_axis, 'memory', 'Memory usage', 'Memory (GB)', scale=1024*1024) - return time_fig, mem_fig \ No newline at end of file + return time_fig, mem_fig From f1683fca2282d7a9f9c5c781150c80181183cbc9 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 14 Feb 2014 21:48:36 -0700 Subject: [PATCH 14/24] adding tests for the commands --- tests/test_commands/test_bench_suite_maker.py | 328 +++++++++++++----- 1 file changed, 234 insertions(+), 94 deletions(-) diff --git a/tests/test_commands/test_bench_suite_maker.py b/tests/test_commands/test_bench_suite_maker.py index 9aa3b0e..29fc9c9 100644 --- a/tests/test_commands/test_bench_suite_maker.py +++ b/tests/test_commands/test_bench_suite_maker.py @@ -12,95 +12,124 @@ from pyqi.core.exception import CommandError from scaling.commands.bench_suite_maker import BenchSuiteMaker + class BenchSuiteMakerTests(TestCase): - def setUp(self): - """Set up data for use in unit tests""" - self.cmd = BenchSuiteMaker() - - self.command = "pick_otus.py" - self.command2 = "split_libraries_fastq.py -m mapping.txt" - self.command3 = "parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna" - - self.param_single = {'jobs_to_start' : ["8", "16", "32"]} - self.param_mult = {'jobs_to_start' : ["8", "16", "32"], - 'similarity' : ["0.94", "0.97", "0.99"]} - - self.bench_files_single = [["1000000.fna"], - ["2000000.fna"], - ["3000000.fna"]] - self.bench_files_mult = [["reads/1000000.fna", - "barcodes/1000000.fna"], - ["reads/2000000.fna", - "barcodes/2000000.fna"], - ["reads/3000000.fna", - "barcodes/3000000.fna"]] - - self.in_opts_single = ['-i'] - self.in_opts_mult = ['-i', '-b'] - - def test_single_input_file_suite(self): - """Bench suite correctly generated with a single input file command""" - obs = self.cmd(command=self.command, - bench_files=self.bench_files_single) - self.assertEqual(obs.keys(), ['bench_suite']) - obs = obs['bench_suite'] - self.assertEqual(obs, single_file_suite) - - obs = self.cmd(command=self.command, - bench_files=self.bench_files_single, - in_opts=self.in_opts_single) - self.assertEqual(obs.keys(), ['bench_suite']) - obs = obs['bench_suite'] - self.assertEqual(obs, single_file_suite) - - def test_multiple_input_files_suite(self): - """Bench suite correctly generated with multiple input files command""" - obs = self.cmd(command=self.command2, - bench_files=self.bench_files_mult, - in_opts=self.in_opts_mult) - self.assertEqual(obs.keys(), ['bench_suite']) - obs = obs['bench_suite'] - self.assertEqual(obs, multiple_file_suite) - - def test_single_parameter_suite(self): - """Bench suite correctly generated with a single parameter""" - obs = self.cmd(command=self.command3, - parameters=self.param_single) - self.assertEqual(obs.keys(), ['bench_suite']) - obs = obs['bench_suite'] - self.assertEqual(obs, single_parameter_suite) - - def test_multiple_parameter_suite(self): - """Bench suite correctly generated with multiple parameters""" - obs = self.cmd(command=self.command3, - parameters=self.param_mult) - self.assertEqual(obs.keys(), ['bench_suite']) - obs = obs['bench_suite'] - self.assertEqual(obs, multiple_parameter_suite) - - def test_invalid_input(self): - """Correctly handles invalid input by raising a CommandError.""" - # Too many options - with self.assertRaises(CommandError): - _ = self.cmd(command=self.command, - parameters=self.param_single, - bench_files=self.bench_files_single) - - # Multiple bench files are provided, but only a single in_opt - with self.assertRaises(CommandError): - _ = self.cmd(command=self.command, - bench_files=self.bench_files_mult) - - with self.assertRaises(CommandError): - _ = self.cmd(command=self.command, - bench_files=self.bench_files_mult, - in_opts=self.in_opts_single) - - # Single bench files are provided, but multiple in_opts - with self.assertRaises(CommandError): - _ = self.cmd(command=self.command, - bench_files=self.bench_files_single, - in_opts=self.in_opts_mult) + def setUp(self): + """Set up data for use in unit tests""" + self.cmd = BenchSuiteMaker() + + self.command = "pick_otus.py" + self.command2 = "split_libraries_fastq.py -m mapping.txt" + self.command3 = ("parallel_pick_otus_uclust_ref.py -r ref_file.fna -i " + "input.fna") + + self.param_single = {'jobs_to_start': ["8", "16", "32"]} + self.param_mult = {'jobs_to_start': ["8", "16", "32"], + 'similarity': ["0.94", "0.97", "0.99"]} + + self.bench_files_single = [["1000000.fna"], + ["2000000.fna"], + ["3000000.fna"]] + self.bench_files_mult = [["reads/1000000.fna", "barcodes/1000000.fna"], + ["reads/2000000.fna", "barcodes/2000000.fna"], + ["reads/3000000.fna", "barcodes/3000000.fna"]] + + self.in_opts_single = ['-i'] + self.in_opts_mult = ['-i', '-b'] + + self.pbs = True + self.job_prefix = "test" + self.queue = "friendlyq" + self.pbs_extra_args = "-m abe" + + def test_single_input_file_suite(self): + """Bench suite correctly generated with a single input file command""" + obs = self.cmd(command=self.command, + bench_files=self.bench_files_single) + self.assertEqual(obs.keys(), ['bench_suite']) + obs = obs['bench_suite'] + self.assertEqual(obs, single_file_suite) + + obs = self.cmd(command=self.command, + bench_files=self.bench_files_single, + in_opts=self.in_opts_single) + self.assertEqual(obs.keys(), ['bench_suite']) + obs = obs['bench_suite'] + self.assertEqual(obs, single_file_suite) + + def test_multiple_input_files_suite(self): + """Bench suite correctly generated with multiple input files command""" + obs = self.cmd(command=self.command2, + bench_files=self.bench_files_mult, + in_opts=self.in_opts_mult) + self.assertEqual(obs.keys(), ['bench_suite']) + obs = obs['bench_suite'] + self.assertEqual(obs, multiple_file_suite) + + def test_pbs_files_suite(self): + """Bench suite correctly generated in a PBS environment""" + obs = self.cmd(command=self.command, + bench_files=self.bench_files_single, + in_opts=self.in_opts_single, + pbs=self.pbs, + job_prefix=self.job_prefix, + queue=self.queue, + pbs_extra_args=self.pbs_extra_args) + self.assertEqual(obs.keys(), ['bench_suite']) + obs = obs['bench_suite'] + self.assertEqual(obs, pbs_file_suite) + + def test_single_parameter_suite(self): + """Bench suite correctly generated with a single parameter""" + obs = self.cmd(command=self.command3, + parameters=self.param_single) + self.assertEqual(obs.keys(), ['bench_suite']) + obs = obs['bench_suite'] + self.assertEqual(obs, single_parameter_suite) + + def test_multiple_parameter_suite(self): + """Bench suite correctly generated with multiple parameters""" + obs = self.cmd(command=self.command3, + parameters=self.param_mult) + self.assertEqual(obs.keys(), ['bench_suite']) + obs = obs['bench_suite'] + self.assertEqual(obs, multiple_parameter_suite) + + def test_pbs_parameter_suite(self): + """""" + obs = self.cmd(command=self.command3, + parameters=self.param_mult, + pbs=self.pbs, + job_prefix=self.job_prefix, + queue=self.queue, + pbs_extra_args=self.pbs_extra_args) + self.assertEqual(obs.keys(), ['bench_suite']) + obs = obs['bench_suite'] + self.assertEqual(obs, pbs_parameter_suite) + + def test_invalid_input(self): + """Correctly handles invalid input by raising a CommandError.""" + # Too many options + with self.assertRaises(CommandError): + _ = self.cmd(command=self.command, + parameters=self.param_single, + bench_files=self.bench_files_single) + + # Multiple bench files are provided, but only a single in_opt + with self.assertRaises(CommandError): + _ = self.cmd(command=self.command, + bench_files=self.bench_files_mult) + + with self.assertRaises(CommandError): + _ = self.cmd(command=self.command, + bench_files=self.bench_files_mult, + in_opts=self.in_opts_single) + + # Single bench files are provided, but multiple in_opts + with self.assertRaises(CommandError): + _ = self.cmd(command=self.command, + bench_files=self.bench_files_single, + in_opts=self.in_opts_mult) single_file_suite = """#!/bin/bash @@ -144,7 +173,7 @@ def test_invalid_input(self): done # Get the benchmark results and produce the plots -scaling process-bench-results -i $timing_dest/ -o $dest/plots/ +scaling process-bench-results -i $timing_dest/ -o $dest/plots/ """ multiple_file_suite = """#!/bin/bash @@ -189,7 +218,54 @@ def test_invalid_input(self): done # Get the benchmark results and produce the plots -scaling process-bench-results -i $timing_dest/ -o $dest/plots/ +scaling process-bench-results -i $timing_dest/ -o $dest/plots/ +""" + +pbs_file_suite = """#!/bin/bash + +# Number of times each command should be executed +num_rep=1 + +# Check if the user supplied a (valid) number of repetitions +if [[ $# -eq 1 ]]; then + if [[ $1 =~ ^[0-9]+$ ]]; then + num_rep=$1 + else + echo "USAGE: $0 [num_reps]" + fi +fi + +# Get a string with current date (format YYYYMMDD_HHMMSS) to name +# the directory with the benchmark results +cdate=`date +_%Y%m%d_%H%M%S` +dest=$PWD/pick_otus$cdate +mkdir $dest + +# Create output directory structure +output_dest=$dest"/command_outputs" +timing_dest=$dest"/timing" + +mkdir $output_dest +mkdir $timing_dest +mkdir $output_dest/1000000 +mkdir $timing_dest/1000000 +mkdir $output_dest/2000000 +mkdir $timing_dest/2000000 +mkdir $output_dest/3000000 +mkdir $timing_dest/3000000 +scaling_jobs="" +# Loop as many times as desired +for i in `seq $num_rep` +do + # benchmarking commands: + scaling_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/1000000/$i.txt pick_otus.py -i 1000000.fna -o $output_dest/1000000/$i" | qsub -k oe -N test0 -q friendlyq -m abe` + scaling_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/2000000/$i.txt pick_otus.py -i 2000000.fna -o $output_dest/2000000/$i" | qsub -k oe -N test1 -q friendlyq -m abe` + scaling_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/3000000/$i.txt pick_otus.py -i 3000000.fna -o $output_dest/3000000/$i" | qsub -k oe -N test2 -q friendlyq -m abe` +done + +# Get the benchmark results and produce the plots +scaling_jobs=${scaling_jobs#?} +scaling process-bench-results -i $timing_dest/ -o $dest/plots/ -w $scaling_jobs """ single_parameter_suite = """#!/bin/bash @@ -237,7 +313,7 @@ def test_invalid_input(self): # Get the benchmark results and produce the plots mkdir $dest/plots -scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start +scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start """ multiple_parameter_suite = """#!/bin/bash @@ -296,9 +372,73 @@ def test_invalid_input(self): # Get the benchmark results and produce the plots mkdir $dest/plots -scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start -scaling process-bench-results -i $timing_dest/similarity -o $dest/plots/similarity +scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start +scaling process-bench-results -i $timing_dest/similarity -o $dest/plots/similarity +""" + +pbs_parameter_suite = """#!/bin/bash + +# Number of times each command should be executed +num_rep=1 + +# Check if the user supplied a (valid) number of repetitions +if [[ $# -eq 1 ]]; then + if [[ $1 =~ ^[0-9]+$ ]]; then + num_rep=$1 + else + echo "USAGE: $0 [num_reps]" + fi +fi + +# Get a string with current date (format YYYYMMDD_HHMMSS) to name +# the directory with the benchmark results +cdate=`date +_%Y%m%d_%H%M%S` +dest=$PWD/parallel_pick_otus_uclust_ref$cdate +mkdir $dest + +# Create output directory structure +output_dest=$dest"/command_outputs" +timing_dest=$dest"/timing" + +mkdir $output_dest +mkdir $timing_dest +mkdir $output_dest/jobs_to_start +mkdir $timing_dest/jobs_to_start +mkdir $output_dest/jobs_to_start/8 +mkdir $timing_dest/jobs_to_start/8 +mkdir $output_dest/jobs_to_start/16 +mkdir $timing_dest/jobs_to_start/16 +mkdir $output_dest/jobs_to_start/32 +mkdir $timing_dest/jobs_to_start/32 +mkdir $output_dest/similarity +mkdir $timing_dest/similarity +mkdir $output_dest/similarity/0.94 +mkdir $timing_dest/similarity/0.94 +mkdir $output_dest/similarity/0.97 +mkdir $timing_dest/similarity/0.97 +mkdir $output_dest/similarity/0.99 +mkdir $timing_dest/similarity/0.99 +jobs_to_start_jobs="" +similarity_jobs="" +# Loop as many times as desired +for i in `seq $num_rep` +do + # benchmarking commands: + jobs_to_start_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/8/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 8 -o $output_dest/jobs_to_start/8/$i" | qsub -k oe -N test0 -q friendlyq -m abe` + jobs_to_start_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/16/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 16 -o $output_dest/jobs_to_start/16/$i" | qsub -k oe -N test1 -q friendlyq -m abe` + jobs_to_start_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/jobs_to_start/32/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --jobs_to_start 32 -o $output_dest/jobs_to_start/32/$i" | qsub -k oe -N test2 -q friendlyq -m abe` + similarity_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.94/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.94 -o $output_dest/similarity/0.94/$i" | qsub -k oe -N test3 -q friendlyq -m abe` + similarity_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.97/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.97 -o $output_dest/similarity/0.97/$i" | qsub -k oe -N test4 -q friendlyq -m abe` + similarity_jobs+=","`echo "cd $PWD; timing_wrapper.sh $timing_dest/similarity/0.99/$i.txt parallel_pick_otus_uclust_ref.py -r ref_file.fna -i input.fna --similarity 0.99 -o $output_dest/similarity/0.99/$i" | qsub -k oe -N test5 -q friendlyq -m abe` +done + +# Get the benchmark results and produce the plots +mkdir $dest/plots +jobs_to_start_jobs=${jobs_to_start_jobs#?} +similarity_jobs=${similarity_jobs#?} +scaling process-bench-results -i $timing_dest/jobs_to_start -o $dest/plots/jobs_to_start -w $jobs_to_start_jobs +scaling process-bench-results -i $timing_dest/similarity -o $dest/plots/similarity -w $similarity_jobs """ if __name__ == '__main__': - main() \ No newline at end of file + main() From a69bc0c23fc3daa5c114f958a7c79f8de9a4e56d Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Sat, 15 Feb 2014 11:13:01 -0700 Subject: [PATCH 15/24] Fixes issue #18 --- scaling/interfaces/optparse/input_handler.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scaling/interfaces/optparse/input_handler.py b/scaling/interfaces/optparse/input_handler.py index 2ce59bf..a7b7528 100644 --- a/scaling/interfaces/optparse/input_handler.py +++ b/scaling/interfaces/optparse/input_handler.py @@ -11,7 +11,10 @@ from os import listdir from os.path import abspath, join + from scaling.parse import parse_parameters_file +from scaling.util import natural_sort + def load_parameters(param_fp): """Return a parsed parameters file""" @@ -20,6 +23,7 @@ def load_parameters(param_fp): return parse_parameters_file(param_f) return None + def get_bench_paths(input_dirs): """Goes through the item in each directory and returns their path @@ -37,7 +41,7 @@ def get_bench_paths(input_dirs): same number of items """ bench_paths_by_dir = [] - + # Loop through the list of directories for input_dir in input_dirs: # Get the contents of the current folder @@ -46,17 +50,17 @@ def get_bench_paths(input_dirs): # Add the folder to the paths, we get absolute paths already paths = map(join, [input_dir] * len(paths), paths) bench_paths_by_dir.append(paths) - + # Check that all the input folders contain the same number of items n = len(bench_paths_by_dir[0]) if not all(len(x) == n for x in bench_paths_by_dir): raise ValueError("All the input directories should contain the same " - "number of items.") - + "number of items.") + # Sort all the lists. It is assumed that all the file or directory names # present on such directories match across benchmark folders - map(sorted, bench_paths_by_dir) - + bench_paths_by_dir = map(natural_sort, bench_paths_by_dir) + # Group the files in different folders by their name matching bench_files = [] for i in range(len(bench_paths_by_dir[0])): From ca7f12dc860109e276a1a4ac3b18b1d8562f89e0 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Sat, 15 Feb 2014 11:17:32 -0700 Subject: [PATCH 16/24] Fixing typo --- scaling/interfaces/optparse/config/process_bench_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scaling/interfaces/optparse/config/process_bench_results.py b/scaling/interfaces/optparse/config/process_bench_results.py index 9caa0f2..ab2495d 100644 --- a/scaling/interfaces/optparse/config/process_bench_results.py +++ b/scaling/interfaces/optparse/config/process_bench_results.py @@ -51,7 +51,7 @@ Name='wait_on', Required=False, Help='Comma-separated list of job ids to wait for before ' - 'processing the results') + 'processing the results'), OptparseOption(Parameter=None, Type='new_dirpath', ShortName='o', From a091210d587ff0a7f3f072f7641a6c5c6670e35b Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Sat, 15 Feb 2014 11:24:19 -0700 Subject: [PATCH 17/24] Fixing subprocess communication --- scaling/cluster_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scaling/cluster_util.py b/scaling/cluster_util.py index 22dfaa5..2a54491 100644 --- a/scaling/cluster_util.py +++ b/scaling/cluster_util.py @@ -27,7 +27,7 @@ def check_status(jobs_to_monitor): # Get all the commands running pf the current user user = os.environ['USER'] qstat_cmd = "qstat | grep %s" % user - proc = subprocess.Popen(qstat_cmd, ) + proc = subprocess.Popen(qstat_cmd, stout=subprocess.PIPE) (stdout, stderr) = proc.communicate() # Parse the qstat output lines = stdout.splitlines() From 1dab36e9989cc02c6dc9693a472f84b0f62468b8 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Sat, 15 Feb 2014 11:25:55 -0700 Subject: [PATCH 18/24] Fixing typo --- scaling/cluster_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scaling/cluster_util.py b/scaling/cluster_util.py index 2a54491..d4f6f5b 100644 --- a/scaling/cluster_util.py +++ b/scaling/cluster_util.py @@ -27,7 +27,7 @@ def check_status(jobs_to_monitor): # Get all the commands running pf the current user user = os.environ['USER'] qstat_cmd = "qstat | grep %s" % user - proc = subprocess.Popen(qstat_cmd, stout=subprocess.PIPE) + proc = subprocess.Popen(qstat_cmd, stdout=subprocess.PIPE) (stdout, stderr) = proc.communicate() # Parse the qstat output lines = stdout.splitlines() From d0a909725226553d0d2a86ab8c9c3ddb40fb3c67 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Sat, 15 Feb 2014 11:39:46 -0700 Subject: [PATCH 19/24] Fixing cluster utils --- scaling/cluster_util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scaling/cluster_util.py b/scaling/cluster_util.py index d4f6f5b..21389cf 100644 --- a/scaling/cluster_util.py +++ b/scaling/cluster_util.py @@ -27,7 +27,8 @@ def check_status(jobs_to_monitor): # Get all the commands running pf the current user user = os.environ['USER'] qstat_cmd = "qstat | grep %s" % user - proc = subprocess.Popen(qstat_cmd, stdout=subprocess.PIPE) + proc = subprocess.Popen(qstat_cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, shell=True) (stdout, stderr) = proc.communicate() # Parse the qstat output lines = stdout.splitlines() From 30c50b4cee2f5f7f64b1ab44968eadd9d76718ab Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Sat, 15 Feb 2014 12:14:14 -0700 Subject: [PATCH 20/24] Fixing cluster utils --- scaling/cluster_util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scaling/cluster_util.py b/scaling/cluster_util.py index 21389cf..287799b 100644 --- a/scaling/cluster_util.py +++ b/scaling/cluster_util.py @@ -35,10 +35,11 @@ def check_status(jobs_to_monitor): running_jobs = [] for l in lines: job_id, job_name, user, time, status, queue = l.split() + job_id = job_id.split('.')[0] # Check if this job is one of the jobs that we have to # monitor and check if it is running or queued if job_id in jobs_to_monitor and status in ['R', 'Q']: - running_jobs.append() + running_jobs.append(job_id) # Return the list with the running jobs that we're still waiting for return running_jobs @@ -50,6 +51,8 @@ def wait_on(jobs_to_monitor, poll_interval=5): jobs_to_monitor: list of job ids poll_interval: interval between checks, in seconds """ + # Get the jobs ids by up to the first '.' character + jobs_to_monitor = [job.split('.')[0] for job in jobs_to_monitor] # Loop until there is some job to monitor while jobs_to_monitor: # Sleep before new job status check From 34988861a5ca14fde2ab0f69c9435802751350ee Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 17 Feb 2014 12:07:05 -0700 Subject: [PATCH 21/24] Fixing @antgonza comments --- scaling/cluster_util.py | 10 +++++----- scaling/commands/bench_suite_maker.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scaling/cluster_util.py b/scaling/cluster_util.py index 21389cf..64766a0 100644 --- a/scaling/cluster_util.py +++ b/scaling/cluster_util.py @@ -9,8 +9,8 @@ __email__ = "josenavasmolina@gmail.com" __status__ = "Development" -import subprocess -import os +from subprocess import Popen +from os import environ from time import sleep @@ -25,10 +25,10 @@ def check_status(jobs_to_monitor): running """ # Get all the commands running pf the current user - user = os.environ['USER'] + user = environ['USER'] qstat_cmd = "qstat | grep %s" % user - proc = subprocess.Popen(qstat_cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, shell=True) + proc = Popen(qstat_cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE, shell=True) (stdout, stderr) = proc.communicate() # Parse the qstat output lines = stdout.splitlines() diff --git a/scaling/commands/bench_suite_maker.py b/scaling/commands/bench_suite_maker.py index 890954c..3b9f704 100644 --- a/scaling/commands/bench_suite_maker.py +++ b/scaling/commands/bench_suite_maker.py @@ -20,9 +20,9 @@ class BenchSuiteMaker(Command): BriefDescription = "Generates a benchmark suite file" LongDescription = ("Given a command and a list of benchmarks files or a " - "dictionary with the options to test, this command " - "generates a shell script that executes a complete " - "benchmark suite.") + "dictionary with the options to test, %prog generates a" + " shell script that executes a complete benchmark " + "suite.") CommandIns = ParameterCollection([ CommandIn(Name='command', DataType=str, Description='command to benchmark', Required=True), From 5835c801da01272f87985de1ec6e177e7d24dac5 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Mon, 17 Feb 2014 13:19:06 -0700 Subject: [PATCH 22/24] Addressing PEP8 issues and improving documentation --- scaling/commands/bench_results_comparator.py | 18 ++++---- scaling/commands/bench_suite_maker.py | 5 +- .../optparse/config/compare_bench_results.py | 46 +++++++++++-------- .../optparse/config/process_bench_results.py | 19 ++++++-- scaling/interfaces/optparse/output_handler.py | 29 ++++++------ scaling/parse.py | 3 +- scaling/process_results.py | 7 +-- scaling/util.py | 18 +++++--- 8 files changed, 87 insertions(+), 58 deletions(-) diff --git a/scaling/commands/bench_results_comparator.py b/scaling/commands/bench_results_comparator.py index 69ad397..6871360 100644 --- a/scaling/commands/bench_results_comparator.py +++ b/scaling/commands/bench_results_comparator.py @@ -10,18 +10,19 @@ __email__ = "josenavasmolina@gmail.com" __status__ = "Development" -from pyqi.core.command import (Command, CommandIn, CommandOut, +from pyqi.core.command import (Command, CommandIn, CommandOut, ParameterCollection) from pyqi.core.exception import CommandError from scaling.process_results import compare_benchmark_results from matplotlib.figure import Figure + class BenchResultsComparator(Command): BriefDescription = "Compare different runs results of the same bench suite" LongDescription = ("Takes a list with paths to directories with benchmark " - "results and generates a plot with the wall time and a plot with the " - "memory consumption of the different runs, allowing performance " - "comparison between them.") + "results and generates a plot with the wall time and a " + "plot with the memory consumption of the different " + "runs, allowing performance comparison between them.") CommandIns = ParameterCollection([ CommandIn(Name='input_dirs', DataType=list, Description='List with the path to the directories with the ' @@ -34,9 +35,10 @@ class BenchResultsComparator(Command): CommandOuts = ParameterCollection([ CommandOut(Name="time_fig", DataType=Figure, - Description="matplotlib figure with the wall time plot"), + Description="matplotlib figure with the wall time plot"), CommandOut(Name="mem_fig", DataType=Figure, - Description="matplotlib figure with the memory consumption plot"), + Description="matplotlib figure with the memory consumption " + "plot"), ]) def run(self, **kwargs): @@ -47,13 +49,13 @@ def run(self, **kwargs): if len(input_dirs) < 2: raise CommandError("You should provide at least two directories " - "with the benchmark results") + "with the benchmark results") time_fig, mem_fig = compare_benchmark_results(input_dirs, labels) result['time_fig'] = time_fig result['mem_fig'] = mem_fig - + return result CommandConstructor = BenchResultsComparator diff --git a/scaling/commands/bench_suite_maker.py b/scaling/commands/bench_suite_maker.py index 3b9f704..d3d11d3 100644 --- a/scaling/commands/bench_suite_maker.py +++ b/scaling/commands/bench_suite_maker.py @@ -20,9 +20,8 @@ class BenchSuiteMaker(Command): BriefDescription = "Generates a benchmark suite file" LongDescription = ("Given a command and a list of benchmarks files or a " - "dictionary with the options to test, %prog generates a" - " shell script that executes a complete benchmark " - "suite.") + "dictionary with the options to test, generates a shell" + " script that executes a complete benchmark suite.") CommandIns = ParameterCollection([ CommandIn(Name='command', DataType=str, Description='command to benchmark', Required=True), diff --git a/scaling/interfaces/optparse/config/compare_bench_results.py b/scaling/interfaces/optparse/config/compare_bench_results.py index 0a51240..d0fd052 100644 --- a/scaling/interfaces/optparse/config/compare_bench_results.py +++ b/scaling/interfaces/optparse/config/compare_bench_results.py @@ -15,6 +15,7 @@ from pyqi.core.command import (make_command_in_collection_lookup_f, make_command_out_collection_lookup_f) from pyqi.core.interfaces.optparse.input_handler import string_list_handler + from scaling.commands.bench_results_comparator import CommandConstructor from scaling.interfaces.optparse.output_handler import write_matplotlib_figure @@ -25,9 +26,14 @@ # Examples of how the command can be used from the command line using an # optparse interface. usage_examples = [ - OptparseUsageExample(ShortDesc="A short single sentence description of the example", - LongDesc="A longer, more detailed description", - Ex="%prog --foo --bar some_file") + OptparseUsageExample(ShortDesc="Compare different runs results of the same" + " bench suite", + LongDesc="Takes a comma-separated list with paths to " + "directories with benchmark results and generates a " + "plot with the wall time and a plot with the memory " + "consumption of the different runs, allowing " + "performance comparison between them.", + Ex="%prog -i timing1,timing2 -l run1,run2 -o plots") ] # inputs map command line arguments and values onto Parameters. It is possible @@ -35,21 +41,23 @@ inputs = [ OptparseOption(Parameter=cmd_in_lookup('input_dirs'), Type='existing_dirpaths', - Action='store', # default is 'store', change if desired - Handler=None, # must be defined if desired - ShortName='i', # must be defined if desired - # Name='input_dirs', # implied by Parameter - # Required=True, # implied by Parameter - # Help='List with the path to the directories with the time results of different runs of the same bench suite', # implied by Parameter + Action='store', + Handler=None, + ShortName='i', + # Name='input_dirs', + # Required=True, + # Help='List with the path to the directories with the time + # results of different runs of the same bench suite', ), OptparseOption(Parameter=cmd_in_lookup('labels'), Type='str', - Action='store', # default is 'store', change if desired - Handler=string_list_handler, # must be defined if desired - ShortName='l', # must be defined if desired - # Name='labels', # implied by Parameter - # Required=True, # implied by Parameter - # Help='List of strings to label each data series on the plot', # implied by Parameter + Action='store', + Handler=string_list_handler, + ShortName='l', + # Name='labels', + # Required=True, + # Help='List of strings to label each data series on the + # plot' ), OptparseOption(Parameter=None, Type='new_dirpath', @@ -64,9 +72,9 @@ # inputs list (above). outputs = [ OptparseResult(Parameter=cmd_out_lookup('mem_fig'), - Handler=write_matplotlib_figure, - InputName='output-dir'), + Handler=write_matplotlib_figure, + InputName='output-dir'), OptparseResult(Parameter=cmd_out_lookup('time_fig'), - Handler=write_matplotlib_figure, - InputName='output-dir'), + Handler=write_matplotlib_figure, + InputName='output-dir'), ] diff --git a/scaling/interfaces/optparse/config/process_bench_results.py b/scaling/interfaces/optparse/config/process_bench_results.py index ab2495d..e5396e5 100644 --- a/scaling/interfaces/optparse/config/process_bench_results.py +++ b/scaling/interfaces/optparse/config/process_bench_results.py @@ -15,9 +15,10 @@ from pyqi.core.command import (make_command_in_collection_lookup_f, make_command_out_collection_lookup_f) from pyqi.core.interfaces.optparse.input_handler import string_list_handler + from scaling.commands.bench_results_processer import CommandConstructor from scaling.interfaces.optparse.output_handler import \ - (write_summarized_results, write_matplotlib_figure, write_string_to_dir) + write_summarized_results, write_matplotlib_figure, write_string_to_dir # Convenience function for looking up parameters by name. cmd_in_lookup = make_command_in_collection_lookup_f(CommandConstructor) @@ -26,9 +27,19 @@ # Examples of how the command can be used from the command line using an # optparse interface. usage_examples = [ - OptparseUsageExample(ShortDesc="A short single sentence description of the example", - LongDesc="A longer, more detailed description", - Ex="%prog --foo --bar some_file") + OptparseUsageExample(ShortDesc="Processes the benchmark suite results", + LongDesc="Takes the benchmark suite output directory " + "and processes the benchmark measurements, creating " + "plots and collapsing results in a usable form.", + Ex="%prog -i timing -o plots"), + OptparseUsageExample(ShortDesc="Wait for a set of PBS jobs to complete and" + " process the benchmark suite results", + LongDesc="Takes a list of PBS job ids, wait for its " + "completion and then takes the benchmark suite output" + " directory and processes the benchmark measurements," + " creating plots and collapsing results in a usable " + "form.", + Ex="%prog -i timing -o plots -w 124311,124312,124313") ] # inputs map command line arguments and values onto Parameters. It is possible diff --git a/scaling/interfaces/optparse/output_handler.py b/scaling/interfaces/optparse/output_handler.py index 8d4224c..e1b1b13 100644 --- a/scaling/interfaces/optparse/output_handler.py +++ b/scaling/interfaces/optparse/output_handler.py @@ -10,19 +10,20 @@ __status__ = "Development" from pyqi.core.exception import IncompetentDeveloperError -from pyqi.core.interfaces.optparse.output_handler import (write_list_of_strings, - write_string) +from pyqi.core.interfaces.optparse.output_handler import \ + write_list_of_strings, write_string import os import numpy as np + def write_summarized_results(result_key, data, option_value=None): """Write the benchmark results in a tab-delimited format option_value is the base output directory Writes a file with the benchmark results in a tab-delimited form, - with the following headers: label, wall_mean, wall_std, user_mean, user_std, - kernel_mean, kernel_std, mem_mean, mem_std + with the following headers: label, wall_mean, wall_std, user_mean, + user_std, kernel_mean, kernel_std, mem_mean, mem_std Each row contains the results for a single experiment """ @@ -32,8 +33,8 @@ def write_summarized_results(result_key, data, option_value=None): if os.path.exists(option_value): if os.path.isfile(option_value): - raise IOError("Output directory '%s' already exists and it is a file." - % option_value) + raise IOError("Output directory '%s' already exists and it is a " + "file." % option_value) else: os.mkdir(option_value) @@ -41,7 +42,7 @@ def write_summarized_results(result_key, data, option_value=None): lines = [] headers = ["#label", "wall_mean", "wall_std", "user_mean", "user_std", - "kernel_mean", "kernel_std", "mem_mean", "mem_std"] + "kernel_mean", "kernel_std", "mem_mean", "mem_std"] lines.append("\t".join(headers)) # Loop over all the experiments for i, label in enumerate(data['label']): @@ -55,9 +56,10 @@ def write_summarized_results(result_key, data, option_value=None): values.append(str(data['memory'][0][i])) values.append(str(data['memory'][1][i])) lines.append("\t".join(values)) - + write_list_of_strings(result_key, lines, option_value=output_fp) + def write_matplotlib_figure(result_key, data, option_value=None): """Write a matplotlib figure to disk @@ -69,8 +71,8 @@ def write_matplotlib_figure(result_key, data, option_value=None): if os.path.exists(option_value): if os.path.isfile(option_value): - raise IOError("Output directory '%s' already exists and it is a file." - % option_value) + raise IOError("Output directory '%s' already exists and it is a " + "file." % option_value) else: os.mkdir(option_value) @@ -80,6 +82,7 @@ def write_matplotlib_figure(result_key, data, option_value=None): data.savefig(output_fp) + def write_string_to_dir(result_key, data, option_value=None): """Write a string to a file @@ -91,10 +94,10 @@ def write_string_to_dir(result_key, data, option_value=None): if os.path.exists(option_value): if os.path.isfile(option_value): - raise IOError("Output directory '%s' already exists and it is a file." - % option_value) + raise IOError("Output directory '%s' already exists and it is a " + "file." % option_value) else: os.mkdir(option_value) output_fp = os.path.join(option_value, "%s.txt" % result_key) - write_string(result_key, data, option_value=output_fp) \ No newline at end of file + write_string(result_key, data, option_value=output_fp) diff --git a/scaling/parse.py b/scaling/parse.py index 9fde875..543315a 100644 --- a/scaling/parse.py +++ b/scaling/parse.py @@ -9,6 +9,7 @@ __email__ = "josenavasmolina@gmail.com" __status__ = "Development" + def parse_parameters_file(lines): """Parses the parameters file encoded in lines and returns it as a dict @@ -27,4 +28,4 @@ def parse_parameters_file(lines): if line: (param, values) = line.split('\t') param_dict[param] = values.split(',') - return param_dict \ No newline at end of file + return param_dict diff --git a/scaling/process_results.py b/scaling/process_results.py index 35be2b2..a85124e 100644 --- a/scaling/process_results.py +++ b/scaling/process_results.py @@ -13,11 +13,12 @@ from os.path import join, isdir, exists import numpy as np from matplotlib import use -use('Agg',warn=False) +use('Agg', warn=False) import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties from scaling.util import natural_sort + def process_timing_directory(timing_dir): """Retrieves the timing results stored in timing_dir in a dict form @@ -56,8 +57,8 @@ def process_timing_directory(timing_dir): dirpath = join(timing_dir, dirname) # Check if it is not a directory - raise a ValueError if True if not isdir(dirpath): - raise ValueError, "%s contains a file: %s." % (timing_dir, - dirpath) + "Only directories are allowed!" + raise ValueError("%s contains a file: %s. Only directories are " + "allowed!" % (timing_dir, dirpath)) # Initialize lists for bench results wall_time = [] diff --git a/scaling/util.py b/scaling/util.py index 8a2ae1f..bd9b6e8 100644 --- a/scaling/util.py +++ b/scaling/util.py @@ -11,26 +11,30 @@ import sys from StringIO import StringIO -import re +from re import split + class OutputRedirect: """Class to redirect the std output to StringIO using a `with` statement""" saved_stdout = None + def __enter__(self): self.saved_stdout = sys.stdout out = StringIO() sys.stdout = out return out + def __exit__(self, type, value, tb): sys.stdout = self.saved_stdout -def natural_sort( l ): + +def natural_sort(l): """ Sort the given list in the way that humans expect. Code adapted from: http://www.codinghorror.com/blog/2007/12/ sorting-for-humans-natural-sort-order.html - """ - convert = lambda text: int(text) if text.isdigit() else text - alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] - l.sort( key=alphanum_key ) - return l \ No newline at end of file + """ + convert = lambda text: int(text) if text.isdigit() else text + alphanum_key = lambda key: [convert(c) for c in split('([0-9]+)', key)] + l.sort(key=alphanum_key) + return l From 5dbbf3fd54bedfb6554b9cead8f3b661417e0baa Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Thu, 20 Feb 2014 18:10:31 -0700 Subject: [PATCH 23/24] Fixing imports --- scaling/cluster_util.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scaling/cluster_util.py b/scaling/cluster_util.py index 01fabf4..6271067 100644 --- a/scaling/cluster_util.py +++ b/scaling/cluster_util.py @@ -9,7 +9,7 @@ __email__ = "josenavasmolina@gmail.com" __status__ = "Development" -from subprocess import Popen +from subprocess import Popen, PIPE from os import environ from time import sleep @@ -27,8 +27,7 @@ def check_status(jobs_to_monitor): # Get all the commands running pf the current user user = environ['USER'] qstat_cmd = "qstat | grep %s" % user - proc = Popen(qstat_cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, shell=True) + proc = Popen(qstat_cmd, stdout=PIPE, stderr=PIPE, shell=True) (stdout, stderr) = proc.communicate() # Parse the qstat output lines = stdout.splitlines() From ff0dd82aa42e6bf746dd9f7baaa4fd61f07cb5a5 Mon Sep 17 00:00:00 2001 From: Jose Navas Date: Fri, 21 Feb 2014 21:38:54 -0700 Subject: [PATCH 24/24] Fixing default value --- scaling/interfaces/optparse/config/process_bench_results.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scaling/interfaces/optparse/config/process_bench_results.py b/scaling/interfaces/optparse/config/process_bench_results.py index e5396e5..5303607 100644 --- a/scaling/interfaces/optparse/config/process_bench_results.py +++ b/scaling/interfaces/optparse/config/process_bench_results.py @@ -61,6 +61,7 @@ ShortName='w', Name='wait_on', Required=False, + Default="", Help='Comma-separated list of job ids to wait for before ' 'processing the results'), OptparseOption(Parameter=None,