From 33fa295bba79f46e5cf82a46aa715cd9b0361a63 Mon Sep 17 00:00:00 2001 From: stxue1 Date: Wed, 25 Sep 2024 17:04:24 -0700 Subject: [PATCH 1/8] mirror import on workers update to wdl --- src/toil/options/wdl.py | 16 ++++++++++ src/toil/wdl/wdltoil.py | 68 +++++++++++++++++++++++++++++++++-------- 2 files changed, 71 insertions(+), 13 deletions(-) diff --git a/src/toil/options/wdl.py b/src/toil/options/wdl.py index 09d6a9ecfe..b4b4baf13b 100644 --- a/src/toil/options/wdl.py +++ b/src/toil/options/wdl.py @@ -1,6 +1,7 @@ from argparse import ArgumentParser from configargparse import SUPPRESS +from toil.lib.conversions import human2bytes def add_wdl_options(parser: ArgumentParser, suppress: bool = True) -> None: @@ -35,3 +36,18 @@ def add_wdl_options(parser: ArgumentParser, suppress: bool = True) -> None: container_arguments = ["--wdlContainer"] + (["--container"] if not suppress else []) parser.add_argument(*container_arguments, dest="container", type=str, choices=["singularity", "docker", "auto"], default="auto", help=suppress_help or "Container engine to use to run WDL tasks") + + parser.add_argument( + "--runImportsOnWorkers", "--run-imports-on-workers", + action="store_true", default=False, + help=suppress_help or "Run the file imports on a worker instead of the leader. This is useful if the leader is not optimized for high network performance." + "If set to true, the argument --importWorkersDisk must also be set.", + dest="run_imports_on_workers" + ) + + parser.add_argument("--importWorkersDisk", "--import-workers-disk", + help=suppress_help or "Specify the amount of disk space an import worker will use. If file streaming for input files is not available," + "this should be set to the size of the largest input file. This must be set in conjunction with the argument runImportsOnWorkers.", + dest="import_workers_disk", + type=lambda x: human2bytes(str(x)), + default=None) diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py index e19f596cb0..b52c2e393e 100755 --- a/src/toil/wdl/wdltoil.py +++ b/src/toil/wdl/wdltoil.py @@ -54,7 +54,7 @@ import WDL.Error import WDL.runtime.config -from configargparse import ArgParser +from configargparse import ArgParser, Namespace from WDL._util import byte_size_units from WDL.Tree import ReadSourceResult from WDL.CLI import print_error @@ -74,7 +74,7 @@ TemporaryID, parse_accelerator, unwrap, - unwrap_all) + unwrap_all, ParseableIndivisibleResource) from toil.jobStores.abstractJobStore import (AbstractJobStore, UnimplementedURLException, InvalidImportExportUrlException, LocatorException) from toil.lib.accelerators import get_individual_local_accelerators @@ -332,7 +332,7 @@ def first_mismatch(prefix: str, value: str) -> int: return modified -def potential_absolute_uris(uri: str, path: List[str], importer: Optional[WDL.Tree.Document] = None) -> Iterator[str]: +def potential_absolute_uris(uri: str, path: List[str], importer: Optional[WDL.Tree.Document] = None, execution_dir: Optional[str] = None) -> Iterator[str]: """ Get potential absolute URIs to check for an imported file. @@ -374,7 +374,8 @@ def potential_absolute_uris(uri: str, path: List[str], importer: Optional[WDL.Tr full_path_list.append(Toil.normalize_uri(importer.pos.abspath)) # Then the current directory. We need to make sure to include a filename component here or it will treat the current directory with no trailing / as a document and relative paths will look 1 level up. - full_path_list.append(Toil.normalize_uri('.') + '/.') + # When importing on a worker, the cwd will be a tmpdir and will result in FileNotFoundError after os.path.abspath, so override with the execution dir + full_path_list.append(Toil.normalize_uri(execution_dir or '.') + '/.') # Then the specified paths. # TODO: @@ -1449,7 +1450,8 @@ def add_paths(task_container: TaskContainer, host_paths: Iterable[str]) -> None: task_container.input_path_map[host_path] = container_path task_container.input_path_map_rev[container_path] = host_path -def import_files(environment: WDLBindings, task_path: str, toil: Toil, path: Optional[List[str]] = None, skip_remote: bool = False) -> WDLBindings: +def import_files(environment: WDLBindings, task_path: str, jobstore: AbstractJobStore, path: Optional[List[str]] = None, skip_remote: bool = False, + execution_dir: Optional[str] = None) -> WDLBindings: """ Make sure all File values embedded in the given bindings are imported, using the given Toil object. @@ -1471,7 +1473,7 @@ def import_file_from_uri(uri: str) -> str: """ tried = [] - for candidate_uri in potential_absolute_uris(uri, path if path is not None else []): + for candidate_uri in potential_absolute_uris(uri, path if path is not None else [], execution_dir=execution_dir): # Try each place it could be according to WDL finding logic. tried.append(candidate_uri) try: @@ -1487,7 +1489,7 @@ def import_file_from_uri(uri: str) -> str: # Actually import # Try to import the file. Don't raise if we can't find it, just # return None! - imported = toil.import_file(candidate_uri, check_existence=False) + imported = jobstore.import_file(candidate_uri) if imported is None: # Wasn't found there continue @@ -3454,6 +3456,32 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: self.defer_postprocessing(job) return job.rv() +# @report_wdl_errors("run wdl import on worker", exit=True) +class WDLImportJob(WDLSectionJob): + def __init__(self, target: Union[WDL.Tree.Workflow, WDL.Tree.Task], inputs: WDLBindings, path: Optional[List[str]] = None, skip_remote: bool = False, + disk_size: Optional[ParseableIndivisibleResource] = None, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any): + """ + Job to take the inputs from the WDL workflow and import them on a worker instead of a leader. Assumes all local and cloud files are accessible. + + This class is only used when runImportsOnWorkers is enabled. + """ + super().__init__(target.name, target.name, wdl_options=wdl_options, local=False, + disk=disk_size, **kwargs) + self._target = target + self._inputs = inputs + self._path = path + self._skip_remote = skip_remote + + def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: + """ + Import the workflow inputs and then create and run the workflow. + :return: Promise of workflow outputs + """ + input_bindings = import_files(self._inputs, self._target.name, file_store.jobStore, self._path, self._skip_remote, self._wdl_options.get("execution_dir")) + root_job = WDLRootJob(self._target, input_bindings, wdl_options=self._wdl_options) + self.addChild(root_job) + return root_job.rv() + @contextmanager def monkeypatch_coerce(standard_library: ToilWDLStdLibBase) -> Generator[None, None, None]: @@ -3497,6 +3525,20 @@ def coerce(self: SelfType, desired_type: Optional[WDL.Type.Base] = None) -> WDL. WDL.Value.Base.coerce = old_base_coerce # type: ignore[method-assign] WDL.Value.String.coerce = old_str_coerce # type: ignore[method-assign] + +def make_root_job(target: Union[WDL.Tree.Workflow, WDL.Tree.Task], inputs: WDLBindings, inputs_search_path: List[str], toil: Toil, wdl_options: Optional[Dict[str, str]], options: Namespace) -> WDLSectionJob: + if options.run_imports_on_workers: + # Run WDL imports on a worker instead + root_job: WDLSectionJob = WDLImportJob(target, inputs, inputs_search_path, skip_remote=options.reference_inputs, disk_size=options.import_workers_disk, wdl_options=wdl_options) + else: + # Run WDL imports on leader + # Import any files in the bindings + input_bindings = import_files(inputs, target.name, toil._jobStore, inputs_search_path, skip_remote=options.reference_inputs) + # Run the workflow and get its outputs namespaced with the workflow name. + root_job = WDLRootJob(target, input_bindings, wdl_options=wdl_options) + return root_job + + @report_wdl_errors("run workflow", exit=True) def main() -> None: """ @@ -3514,6 +3556,11 @@ def main() -> None: # TODO: Move cwltoil's generate_default_job_store where we can use it options.jobStore = os.path.join(mkdtemp(), 'tree') + # Take care of incompatible arguments related to file imports + if options.run_imports_on_workers is True and options.import_workers_disk is None: + logger.error("Commandline arguments --runImportsOnWorkers and --importWorkersDisk must both be set to run file imports on workers.") + return + # Make sure we have an output directory (or URL prefix) and we don't need # to ever worry about a None, and MyPy knows it. # If we don't have a directory assigned, make one in the current directory. @@ -3586,9 +3633,6 @@ def main() -> None: logger.info("Inputs appear to come from a Github repository; adding repository root to file search path") inputs_search_path.append(match.group(0)) - # Import any files in the bindings - input_bindings = import_files(input_bindings, target.name, toil, inputs_search_path, skip_remote=options.reference_inputs) - # TODO: Automatically set a good MINIWDL__SINGULARITY__IMAGE_CACHE ? # Get the execution directory @@ -3599,9 +3643,7 @@ def main() -> None: wdl_options["execution_dir"] = execution_dir wdl_options["container"] = options.container assert wdl_options.get("container") is not None - - # Run the workflow and get its outputs namespaced with the workflow name. - root_job = WDLRootJob(target, input_bindings, wdl_options=wdl_options) + root_job = make_root_job(target, input_bindings, inputs_search_path, toil, wdl_options, options) output_bindings = toil.start(root_job) if not isinstance(output_bindings, WDL.Env.Bindings): raise RuntimeError("The output of the WDL job is not a binding.") From a46a001c572d5ed4913794f4ddcc00f98937c57e Mon Sep 17 00:00:00 2001 From: stxue1 <122345910+stxue1@users.noreply.github.com> Date: Fri, 27 Sep 2024 12:42:37 -0700 Subject: [PATCH 2/8] Apply suggestions from code review Co-authored-by: Adam Novak --- src/toil/wdl/wdltoil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py index 71af553d2f..d8efd2dcf7 100755 --- a/src/toil/wdl/wdltoil.py +++ b/src/toil/wdl/wdltoil.py @@ -74,7 +74,8 @@ TemporaryID, parse_accelerator, unwrap, - unwrap_all, ParseableIndivisibleResource) + unwrap_all, + ParseableIndivisibleResource) from toil.jobStores.abstractJobStore import (AbstractJobStore, UnimplementedURLException, InvalidImportExportUrlException, LocatorException) from toil.lib.accelerators import get_individual_local_accelerators @@ -3469,7 +3470,6 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: self.defer_postprocessing(job) return job.rv() -# @report_wdl_errors("run wdl import on worker", exit=True) class WDLImportJob(WDLSectionJob): def __init__(self, target: Union[WDL.Tree.Workflow, WDL.Tree.Task], inputs: WDLBindings, path: Optional[List[str]] = None, skip_remote: bool = False, disk_size: Optional[ParseableIndivisibleResource] = None, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any): From 413e3eaa272da89e2daec3cc9fbcdd624d2a9908 Mon Sep 17 00:00:00 2001 From: stxue1 Date: Fri, 27 Sep 2024 12:57:17 -0700 Subject: [PATCH 3/8] Rename WDLRootJob, remove kebab case from WDL, and raise an error if incompatible options are detected --- src/toil/options/wdl.py | 19 ++++++------------- src/toil/wdl/wdltoil.py | 9 ++++----- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/src/toil/options/wdl.py b/src/toil/options/wdl.py index 9e1e78c595..1a9283c11a 100644 --- a/src/toil/options/wdl.py +++ b/src/toil/options/wdl.py @@ -38,19 +38,12 @@ def add_wdl_options(parser: ArgumentParser, suppress: bool = True) -> None: container_arguments = ["--wdlContainer"] + (["--container"] if not suppress else []) parser.add_argument(*container_arguments, dest="container", type=str, choices=["singularity", "docker", "auto"], default="auto", help=suppress_help or "Container engine to use to run WDL tasks") - parser.add_argument( - "--runImportsOnWorkers", "--run-imports-on-workers", - action="store_true", default=False, - help=suppress_help or "Run the file imports on a worker instead of the leader. This is useful if the leader is not optimized for high network performance." - "If set to true, the argument --importWorkersDisk must also be set.", - dest="run_imports_on_workers" - ) - parser.add_argument("--importWorkersDisk", "--import-workers-disk", - help=suppress_help or "Specify the amount of disk space an import worker will use. If file streaming for input files is not available," - "this should be set to the size of the largest input file. This must be set in conjunction with the argument runImportsOnWorkers.", - dest="import_workers_disk", - type=lambda x: human2bytes(str(x)), - default=None) + parser.add_argument("--runImportsOnWorkers", action="store_true", default=False, dest="run_imports_on_workers", + help=suppress_help or "Run the file imports on a worker instead of the leader. This is useful if the leader is not optimized for high network performance. " + "If set to true, the argument --importWorkersDisk must also be set.") + parser.add_argument("--importWorkersDisk", dest="import_workers_disk", type=lambda x: human2bytes(str(x)), default=None, + help=suppress_help or "Specify the amount of disk space an import worker will use. If file streaming for input files is not available, " + "this should be set to the size of the largest input file. This must be set in conjunction with the argument runImportsOnWorkers.") all_call_outputs_arguments = ["--wdlAllCallOutputs"] + (["--allCallOutputs"] if not suppress else []) parser.add_argument(*all_call_outputs_arguments, dest="all_call_outputs", type=strtobool, default=None, help=suppress_help or "Keep and return all call outputs as workflow outputs") diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py index 88411c5a69..99c2475e25 100755 --- a/src/toil/wdl/wdltoil.py +++ b/src/toil/wdl/wdltoil.py @@ -3438,7 +3438,7 @@ def run(self, file_store: AbstractFileStore) -> WDLBindings: return self.postprocess(output_bindings) -class WDLRootJob(WDLSectionJob): +class WDLStartJob(WDLSectionJob): """ Job that evaluates an entire WDL workflow, and returns the workflow outputs namespaced with the workflow name. Inputs may or may not be namespaced with @@ -3495,7 +3495,7 @@ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]: :return: Promise of workflow outputs """ input_bindings = import_files(self._inputs, self._target.name, file_store.jobStore, self._path, self._skip_remote, self._wdl_options.get("execution_dir")) - root_job = WDLRootJob(self._target, input_bindings, wdl_options=self._wdl_options) + root_job = WDLStartJob(self._target, input_bindings, wdl_options=self._wdl_options) self.addChild(root_job) return root_job.rv() @@ -3552,7 +3552,7 @@ def make_root_job(target: Union[WDL.Tree.Workflow, WDL.Tree.Task], inputs: WDLBi # Import any files in the bindings input_bindings = import_files(inputs, target.name, toil._jobStore, inputs_search_path, skip_remote=options.reference_inputs) # Run the workflow and get its outputs namespaced with the workflow name. - root_job = WDLRootJob(target, input_bindings, wdl_options=wdl_options) + root_job = WDLStartJob(target, input_bindings, wdl_options=wdl_options) return root_job @@ -3575,8 +3575,7 @@ def main() -> None: # Take care of incompatible arguments related to file imports if options.run_imports_on_workers is True and options.import_workers_disk is None: - logger.error("Commandline arguments --runImportsOnWorkers and --importWorkersDisk must both be set to run file imports on workers.") - return + raise RuntimeError("Commandline arguments --runImportsOnWorkers and --importWorkersDisk must both be set to run file imports on workers.") # Make sure we have an output directory (or URL prefix) and we don't need # to ever worry about a None, and MyPy knows it. From 0fcdf35603d09382fa3a072f7ecde3f2e501f5e6 Mon Sep 17 00:00:00 2001 From: stxue1 Date: Fri, 27 Sep 2024 19:34:41 -0700 Subject: [PATCH 4/8] Move out shared runner options --- src/toil/common.py | 19 ++++++++++++++----- src/toil/options/cwl.py | 16 ---------------- src/toil/options/wdl.py | 7 ------- 3 files changed, 14 insertions(+), 28 deletions(-) diff --git a/src/toil/common.py b/src/toil/common.py index 62a052be9f..8f45ee8c4c 100644 --- a/src/toil/common.py +++ b/src/toil/common.py @@ -56,6 +56,7 @@ from toil.options.common import add_base_toil_options, JOBSTORE_HELP from toil.options.cwl import add_cwl_options +from toil.options.runner import add_runner_options from toil.options.wdl import add_wdl_options if sys.version_info >= (3, 8): @@ -582,6 +583,12 @@ def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap: "`toil config ~/.toil/default.yaml`.\n\nBASE TOIL OPTIONS\n") all_data.append(toil_base_data) + parser = ArgParser(YAMLConfigFileParser()) + add_runner_options(parser) + toil_cwl_data = create_config_dict_from_parser(parser) + toil_cwl_data.yaml_set_start_comment("\nTOIL SHARED CWL AND WDL RUNNER OPTIONS") + all_data.append(toil_cwl_data) + parser = ArgParser(YAMLConfigFileParser()) add_cwl_options(parser) toil_cwl_data = create_config_dict_from_parser(parser) @@ -616,10 +623,10 @@ def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap: def parser_with_common_options( - provisioner_options: bool = False, - jobstore_option: bool = True, - prog: Optional[str] = None, - default_log_level: Optional[int] = None + provisioner_options: bool = False, + jobstore_option: bool = True, + prog: Optional[str] = None, + default_log_level: Optional[int] = None ) -> ArgParser: parser = ArgParser(prog=prog or "Toil", formatter_class=ArgumentDefaultsHelpFormatter) @@ -694,6 +701,8 @@ def addOptions(parser: ArgumentParser, jobstore_as_flag: bool = False, cwl: bool # This is done so the config file can hold all available options add_cwl_options(parser, suppress=not cwl) add_wdl_options(parser, suppress=not wdl) + # Add shared runner options + add_runner_options(parser) def check_arguments(typ: str) -> None: """ @@ -707,6 +716,7 @@ def check_arguments(typ: str) -> None: add_cwl_options(check_parser) if typ == "cwl": add_wdl_options(check_parser) + add_runner_options(check_parser) for action in check_parser._actions: action.default = SUPPRESS other_options, _ = check_parser.parse_known_args(sys.argv[1:], ignore_help_args=True) @@ -912,7 +922,6 @@ def start(self, rootJob: "Job") -> Any: # Check that the rootJob has been initialized rootJob.check_initialized() - # Write shared files to the job store self._jobStore.write_leader_pid() self._jobStore.write_leader_node_id() diff --git a/src/toil/options/cwl.py b/src/toil/options/cwl.py index a606db34e5..0db2c80889 100644 --- a/src/toil/options/cwl.py +++ b/src/toil/options/cwl.py @@ -2,7 +2,6 @@ from argparse import ArgumentParser from configargparse import SUPPRESS -from toil.lib.conversions import human2bytes from toil.version import baseVersion @@ -282,21 +281,6 @@ def add_cwl_options(parser: ArgumentParser, suppress: bool = True) -> None: help=suppress_help or "Disable file streaming for files that have 'streamable' flag True", dest="disable_streaming", ) - parser.add_argument( - "--runImportsOnWorkers", "--run-imports-on-workers", - action="store_true", - default=False, - help=suppress_help or "Run the file imports on a worker instead of the leader. This is useful if the leader is not optimized for high network performance." - "If set to true, the argument --importWorkersDisk must also be set.", - dest="run_imports_on_workers" - ) - - parser.add_argument("--importWorkersDisk", "--import-workers-disk", - help=suppress_help or "Specify the amount of disk space an import worker will use. If file streaming for input files is not available," - "this should be set to the size of the largest input file. This must be set in conjunction with the argument runImportsOnWorkers.", - dest="import_workers_disk", - type=lambda x: human2bytes(str(x)), - default=None) provgroup = parser.add_argument_group( "Options for recording provenance information of the execution" diff --git a/src/toil/options/wdl.py b/src/toil/options/wdl.py index 1a9283c11a..6c40cf0e07 100644 --- a/src/toil/options/wdl.py +++ b/src/toil/options/wdl.py @@ -1,7 +1,6 @@ from argparse import ArgumentParser from configargparse import SUPPRESS -from toil.lib.conversions import human2bytes from toil.lib.conversions import strtobool @@ -38,12 +37,6 @@ def add_wdl_options(parser: ArgumentParser, suppress: bool = True) -> None: container_arguments = ["--wdlContainer"] + (["--container"] if not suppress else []) parser.add_argument(*container_arguments, dest="container", type=str, choices=["singularity", "docker", "auto"], default="auto", help=suppress_help or "Container engine to use to run WDL tasks") - parser.add_argument("--runImportsOnWorkers", action="store_true", default=False, dest="run_imports_on_workers", - help=suppress_help or "Run the file imports on a worker instead of the leader. This is useful if the leader is not optimized for high network performance. " - "If set to true, the argument --importWorkersDisk must also be set.") - parser.add_argument("--importWorkersDisk", dest="import_workers_disk", type=lambda x: human2bytes(str(x)), default=None, - help=suppress_help or "Specify the amount of disk space an import worker will use. If file streaming for input files is not available, " - "this should be set to the size of the largest input file. This must be set in conjunction with the argument runImportsOnWorkers.") all_call_outputs_arguments = ["--wdlAllCallOutputs"] + (["--allCallOutputs"] if not suppress else []) parser.add_argument(*all_call_outputs_arguments, dest="all_call_outputs", type=strtobool, default=None, help=suppress_help or "Keep and return all call outputs as workflow outputs") From 361551d6636ba30c7b42fba3aeab8ee447ce0e21 Mon Sep 17 00:00:00 2001 From: stxue1 Date: Mon, 30 Sep 2024 15:02:08 -0700 Subject: [PATCH 5/8] Add runner.py --- src/toil/options/runner.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/toil/options/runner.py diff --git a/src/toil/options/runner.py b/src/toil/options/runner.py new file mode 100644 index 0000000000..bb82cdab02 --- /dev/null +++ b/src/toil/options/runner.py @@ -0,0 +1,25 @@ +from argparse import ArgumentParser +from toil.lib.conversions import human2bytes + + +def add_runner_options(parser: ArgumentParser, cwl: bool = False, wdl: bool = False) -> None: + """ + Add to the WDL or CWL runners options that are shared or the same between runners + :param parser: parser to add arguments to + :param cwl: bool + :param wdl: bool + :return: None + """ + # This function should be constructed so that even when wdl and cwl are false, the "default" options are still added + run_imports_on_workers_arguments = ["--runImportsOnWorkers"] + if cwl: + run_imports_on_workers_arguments.append("--run-imports-on-workers") + parser.add_argument(*run_imports_on_workers_arguments, action="store_true", default=False, dest="run_imports_on_workers", + help="Run the file imports on a worker instead of the leader. This is useful if the leader is not optimized for high network performance. " + "If set to true, the argument --importWorkersDisk must also be set.") + import_workers_disk_arguments = ["--importWorkersDisk"] + if cwl: + import_workers_disk_arguments.append("--import-workers-disk") + parser.add_argument(*import_workers_disk_arguments, dest="import_workers_disk", type=lambda x: human2bytes(str(x)), default=None, + help="Specify the amount of disk space an import worker will use. If file streaming for input files is not available, " + "this should be set to the size of the largest input file. This must be set in conjunction with the argument runImportsOnWorkers.") From 932932752a9c47beadc093797e4dcdfb087fea99 Mon Sep 17 00:00:00 2001 From: stxue1 Date: Thu, 3 Oct 2024 12:04:13 -0700 Subject: [PATCH 6/8] Add logDebug to giraffe workflow --- src/toil/test/wdl/wdltoil_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/toil/test/wdl/wdltoil_test.py b/src/toil/test/wdl/wdltoil_test.py index c7ba29626c..c135ca7d85 100644 --- a/src/toil/test/wdl/wdltoil_test.py +++ b/src/toil/test/wdl/wdltoil_test.py @@ -400,7 +400,7 @@ def test_giraffe(self): json_file = f"{base_uri}/params/giraffe.json" result_json = subprocess.check_output( - self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl', '--scale', + self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl', '--scale', '--logDebug', '0.1']) result = json.loads(result_json) From 881ab9ab38291ea7cb34349921a10e5400397b8d Mon Sep 17 00:00:00 2001 From: stxue1 Date: Thu, 3 Oct 2024 12:14:08 -0700 Subject: [PATCH 7/8] Add toil.import_file logic into the wdl import function --- src/toil/wdl/wdltoil.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/toil/wdl/wdltoil.py b/src/toil/wdl/wdltoil.py index f65cd487dd..4e1367602b 100755 --- a/src/toil/wdl/wdltoil.py +++ b/src/toil/wdl/wdltoil.py @@ -791,10 +791,10 @@ def is_toil_url(filename: str) -> bool: def is_standard_url(filename: str) -> bool: - return is_url(filename, ['http:', 'https:', 's3:', 'gs:']) + return is_url(filename, ['http:', 'https:', 's3:', 'gs:', 'ftp:']) -def is_url(filename: str, schemes: List[str]=['http:', 'https:', 's3:', 'gs:', TOIL_URI_SCHEME]) -> bool: +def is_url(filename: str, schemes: List[str]=['http:', 'https:', 's3:', 'gs:', 'ftp:', TOIL_URI_SCHEME]) -> bool: """ Decide if a filename is a known kind of URL """ @@ -843,12 +843,8 @@ def import_filename(filename: str) -> Tuple[Optional[str], Optional[str]]: return candidate_uri, None else: # Actually import - # Try to import the file. Don't raise if we can't find it, just - # return None! + # Try to import the file. If we can't find it, continue imported = file_source.import_file(candidate_uri) - if imported is None: - # Wasn't found there - continue except UnimplementedURLException as e: # We can't find anything that can even support this URL scheme. # Report to the user, they are probably missing an extra. @@ -859,6 +855,9 @@ def import_filename(filename: str) -> Tuple[Optional[str], Optional[str]]: logger.warning("Checked URL %s but got HTTP status %s", candidate_uri, e.code) # Try the next location. continue + except FileNotFoundError: + # Wasn't found there + continue except Exception: # Something went wrong besides the file not being found. Maybe # we have no auth. From 3ba03919d97fc5160784cb7956083eb5634cc802 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Mon, 7 Oct 2024 11:54:28 -0400 Subject: [PATCH 8/8] Reconnect test command argument to its option --- src/toil/test/wdl/wdltoil_test.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/toil/test/wdl/wdltoil_test.py b/src/toil/test/wdl/wdltoil_test.py index c135ca7d85..0dadefb479 100644 --- a/src/toil/test/wdl/wdltoil_test.py +++ b/src/toil/test/wdl/wdltoil_test.py @@ -400,8 +400,18 @@ def test_giraffe(self): json_file = f"{base_uri}/params/giraffe.json" result_json = subprocess.check_output( - self.base_command + [wdl_file, json_file, '-o', self.output_dir, '--outputDialect', 'miniwdl', '--scale', '--logDebug', - '0.1']) + self.base_command + [ + wdl_file, + json_file, + '-o', + self.output_dir, + '--outputDialect', + 'miniwdl', + '--scale', + '0.1', + '--logDebug', + ] + ) result = json.loads(result_json) # Expect MiniWDL-style output with a designated "dir"