From 854dc0c2545400eee5c2c5e0463e2477093a7639 Mon Sep 17 00:00:00 2001 From: stxue1 Date: Mon, 15 Jul 2024 11:54:14 -0700 Subject: [PATCH] Prepull containers (docker and singularity) by default --- requirements-cwl.txt | 1 + src/toil/cwl/cwltoil.py | 27 +++++++++++++++++++++++++++ src/toil/options/cwl.py | 7 +++++++ 3 files changed, 35 insertions(+) diff --git a/requirements-cwl.txt b/requirements-cwl.txt index fe0eefe9a5..b4c94e0f9d 100644 --- a/requirements-cwl.txt +++ b/requirements-cwl.txt @@ -6,3 +6,4 @@ ruamel.yaml>=0.15,<=0.19 ruamel.yaml.clib>=0.2.6 networkx!=2.8.1,<4 CacheControl[filecache] +cwl-utils==0.33 \ No newline at end of file diff --git a/src/toil/cwl/cwltoil.py b/src/toil/cwl/cwltoil.py index bdf2eeaa41..99e5061b15 100644 --- a/src/toil/cwl/cwltoil.py +++ b/src/toil/cwl/cwltoil.py @@ -100,6 +100,7 @@ from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM from toil.common import Toil, addOptions from toil.cwl import check_cwltool_version +from toil.lib.misc import call_command from toil.provisioners.clusterScaler import JobTooBigError check_cwltool_version() @@ -233,6 +234,28 @@ def ensure_no_collisions( seen_names.add(wanted_name) +def try_prepull(cwl_tool_uri: str, runtime_context: cwltool.context.RuntimeContext, batchsystem: str) -> None: + """ + Try to prepull all containers in a CWL workflow with Singularity or Docker. + This will not prepull the default container specified on the command line. + :param cwl_tool_uri: CWL workflow URL. Fragments are accepted as well + :param runtime_context: runtime context of cwltool + :param batchsystem: type of Toil batchsystem + :return: + """ + if runtime_context.singularity: + if "CWL_SINGULARITY_CACHE" in os.environ: + logger.info("Prepulling containers with Singularity...") + call_command(["cwl-docker-extract", "--singularity", "--dir", os.environ['CWL_SINGULARITY_CACHE'], cwl_tool_uri]) + elif not runtime_context.user_space_docker_cmd and not runtime_context.podman: + # For udocker and podman prefetching is unimplemented + # This is docker + if batchsystem == "single_machine": + # Only on single machine will the docker daemon be accessible by all workers and the leader + logger.info("Prepulling containers with Docker...") + call_command(["cwl-docker-extract", cwl_tool_uri]) + + class Conditional: """ Object holding conditional expression until we are ready to evaluate it. @@ -3725,6 +3748,10 @@ def main(args: Optional[List[str]] = None, stdout: TextIO = sys.stdout) -> int: ) raise + # Attempt to prepull the containers + if not options.no_prepull: + try_prepull(uri, runtime_context, toil.config.batchSystem) + options.tool_help = None options.debug = options.logLevel == "DEBUG" job_order_object, options.basedir, jobloader = cwltool.main.load_job_order( diff --git a/src/toil/options/cwl.py b/src/toil/options/cwl.py index ed3f44e7a2..0db2c80889 100644 --- a/src/toil/options/cwl.py +++ b/src/toil/options/cwl.py @@ -110,6 +110,13 @@ def add_cwl_options(parser: ArgumentParser, suppress: bool = True) -> None: dest="cidfile_prefix", ) + parser.add_argument( + "--no-prepull", + action="store_true", + default=False, + help=suppress_help or "Do not prepull the container prior to running the workflow", + ) + parser.add_argument( "--preserve-environment", type=str,