Skip to content

Commit

Permalink
Merge pull request #97 from RIVM-bioinformatics/containers
Browse files Browse the repository at this point in the history
Containerization of workflow
  • Loading branch information
florianzwagemaker authored May 6, 2024
2 parents 7216e7b + ea17e19 commit 7679e8e
Show file tree
Hide file tree
Showing 25 changed files with 1,007 additions and 102 deletions.
92 changes: 92 additions & 0 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: build containers and run tests

#todo: change this to an on_pullrequest trigger only to main branch
on:
pull_request:
branches:
- '*'

jobs:
Setup_and_build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Setup Mamba
uses: mamba-org/setup-micromamba@v1
with:
cache-environment: true
post-cleanup: 'all'
environment-file: env.yml
init-shell: bash

- name: Install local python package
run: |
pip install . --no-deps
shell: micromamba-shell {0}

- name: build containers
run: |
python containers/build_containers.py
env:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
shell: micromamba-shell {0}

- name: zip built containers
run: |
cd ./containers/
tar -czvf containers.tar.gz builtcontainers.json $(find . -type f -name "*.tar" -printf '%f ')
- name: Upload container artifacts
uses: actions/upload-artifact@v3
with:
name: built_containers
path: ./containers/containers.tar.gz

Test:
runs-on: ubuntu-latest
needs: Setup_and_build
steps:
- uses: actions/checkout@v3

- uses: actions/download-artifact@v3
with:
name: built_containers

- name: move artifact
run: |
mv ./containers.tar.gz ./containers/containers.tar.gz
- name: unzip built containers
run: |
cd ./containers/
tar -xzvf containers.tar.gz
cd ..
- name: Setup Apptainer
uses: eWaterCycle/setup-apptainer@v2

- name: Setup Mamba
uses: mamba-org/setup-micromamba@v1
with:
cache-environment: true
post-cleanup: 'all'
environment-file: env.yml
init-shell: bash

- name: Install local python package
run: |
pip install . --no-deps
shell: micromamba-shell {0}

- name: convert containers
run: |
python containers/convert_artifact_containers_for_apptainer.py
- name: download existing containers
run: |
python containers/pull_published_containers.py
shell: micromamba-shell {0}

## rest of the testing suite here
67 changes: 67 additions & 0 deletions .github/workflows/publish_containers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: Publish containers

#todo: change this to an onrelease trigger
on:
push:
branches:
- dev


jobs:
Upload:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Download artifact
id: download-artifact
uses: dawidd6/action-download-artifact@v3
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
workflow: build_and_test.yml
name: built_containers
skip_unpack: true

- name: move artifact
run: |
mv ./containers.tar.gz ./containers/containers.tar.gz
- name: list directory contents
run: |
ls -R ./
- name: unzip built containers
run: |
cd ./containers/
tar -xzvf containers.tar.gz
cd ..
- name: Setup Mamba
uses: mamba-org/setup-micromamba@v1
with:
cache-environment: true
post-cleanup: 'all'
environment-file: env.yml
init-shell: bash

- name: Install local python package
run: |
pip install . --no-deps
shell: micromamba-shell {0}

- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Add artifacted containers to docker daemon
run: |
python containers/add_OCI_to_docker_engine.py
shell: micromamba-shell {0}

- name: tag and push containers
run: |
python containers/tag_and_push_containers.py
shell: micromamba-shell {0}
36 changes: 29 additions & 7 deletions ViroConstrictor/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
from ViroConstrictor.runconfigs import GetSnakemakeRunDetails, WriteYaml
from ViroConstrictor.runreport import WriteReport
from ViroConstrictor.update import update
from ViroConstrictor.workflow.containers import (
construct_container_bind_args,
download_containers,
)


def get_preset_warning_list(
Expand Down Expand Up @@ -151,7 +155,12 @@ def main() -> NoReturn:
inputs_obj=parsed_input, samplesheetfilename="samples_main"
)

if download_containers(snakemake_run_details.snakemake_run_conf) != 0:
log.error("Failed to download containers required for workflow.\nPlease check the logs and your settings for more information and try again later.")
sys.exit(1)

log.info(f"{'='*20} [bold yellow] Starting Main Workflow [/bold yellow] {'='*20}")

status: bool = False
if parsed_input.user_config["COMPUTING"]["compmode"] == "local":
status = snakemake.snakemake(
Expand All @@ -160,22 +169,28 @@ def main() -> NoReturn:
cores=snakemake_run_details.snakemake_run_conf["cores"],
use_conda=snakemake_run_details.snakemake_run_conf["use-conda"],
conda_frontend="mamba",
use_singularity=snakemake_run_details.snakemake_run_conf["use-singularity"],
singularity_args=construct_container_bind_args(parsed_input.samples_dict),
jobname=snakemake_run_details.snakemake_run_conf["jobname"],
latency_wait=snakemake_run_details.snakemake_run_conf["latency-wait"],
dryrun=snakemake_run_details.snakemake_run_conf["dryrun"],
configfiles=[
WriteYaml(
snakemake_run_details.snakemake_run_parameters,
f"{parsed_input.workdir}/config/run_params.yaml",
)
),
WriteYaml(
snakemake_run_details.snakemake_run_conf,
f"{parsed_input.workdir}/config/run_params.yaml",
),
],
restart_times=3,
keepgoing=True,
restart_times=snakemake_run_details.snakemake_run_conf["restart-times"],
keepgoing=snakemake_run_details.snakemake_run_conf["keep-going"],
quiet=["all"], # type: ignore
log_handler=[
ViroConstrictor.logging.snakemake_logger(logfile=parsed_input.logfile)
],
printshellcmds=False,
printshellcmds=snakemake_run_details.snakemake_run_conf["printshellcmds"],
)
if parsed_input.user_config["COMPUTING"]["compmode"] == "grid":
status = snakemake.snakemake(
Expand All @@ -185,6 +200,8 @@ def main() -> NoReturn:
nodes=snakemake_run_details.snakemake_run_conf["cores"],
use_conda=snakemake_run_details.snakemake_run_conf["use-conda"],
conda_frontend="mamba",
use_singularity=snakemake_run_details.snakemake_run_conf["use-singularity"],
singularity_args=construct_container_bind_args(parsed_input.samples_dict),
jobname=snakemake_run_details.snakemake_run_conf["jobname"],
latency_wait=snakemake_run_details.snakemake_run_conf["latency-wait"],
drmaa=snakemake_run_details.snakemake_run_conf["drmaa"],
Expand All @@ -194,14 +211,19 @@ def main() -> NoReturn:
WriteYaml(
snakemake_run_details.snakemake_run_parameters,
f"{parsed_input.workdir}/config/run_params.yaml",
)
),
WriteYaml(
snakemake_run_details.snakemake_run_conf,
f"{parsed_input.workdir}/config/run_configs.yaml",
),
],
restart_times=3,
keepgoing=True,
restart_times=snakemake_run_details.snakemake_run_conf["restart-times"],
keepgoing=snakemake_run_details.snakemake_run_conf["keep-going"],
quiet=["all"], # type: ignore
log_handler=[
ViroConstrictor.logging.snakemake_logger(logfile=parsed_input.logfile)
],
printshellcmds=snakemake_run_details.snakemake_run_conf["printshellcmds"],
)

if snakemake_run_details.snakemake_run_conf["dryrun"] is False and status is True:
Expand Down
3 changes: 2 additions & 1 deletion ViroConstrictor/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ def pathCompleter(self, text: str, state: int) -> str:
if os.path.isdir(text):
text += "/"

return list(glob.glob(f"{text}*"))[state]
# we explicitly to a list comprehension here instead of a call to the constructor as the this would otherwise break the autocompletion functionality of paths.
return [x for x in glob.glob(f"{text}*")][state]

def createListCompleter(self, ll: list[str]) -> None:
"""
Expand Down
1 change: 1 addition & 0 deletions ViroConstrictor/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def print_jobstatistics_logmessage(msg: dict) -> None:

logmessage_strings_info: dict[str, Any] = {
"Activating conda environment": ColorizeLogMessagePath,
"Activating singularity image": ColorizeLogMessagePath,
"Building DAG of jobs": BaseLogMessage,
"Creating conda environment": ColorizeLogMessagePath,
"Removing incomplete Conda environment": ColorizeLogMessagePath,
Expand Down
50 changes: 37 additions & 13 deletions ViroConstrictor/match_ref.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import copy
import sys
from typing import Literal

import pandas as pd
Expand All @@ -9,6 +10,10 @@
from ViroConstrictor.parser import CLIparser
from ViroConstrictor.runconfigs import GetSnakemakeRunDetails, WriteYaml
from ViroConstrictor.runreport import WriteReport
from ViroConstrictor.workflow.containers import (
construct_container_bind_args,
download_containers,
)


def run_snakemake(
Expand Down Expand Up @@ -36,23 +41,29 @@ def run_snakemake(
cores=snakemakedetails.snakemake_run_conf["cores"],
use_conda=snakemakedetails.snakemake_run_conf["use-conda"],
conda_frontend="mamba",
use_singularity=snakemakedetails.snakemake_run_conf["use-singularity"],
singularity_args=construct_container_bind_args(inputs_obj.samples_dict),
jobname=snakemakedetails.snakemake_run_conf["jobname"],
latency_wait=snakemakedetails.snakemake_run_conf["latency-wait"],
dryrun=snakemakedetails.snakemake_run_conf["dryrun"],
configfiles=[
WriteYaml(
snakemakedetails.snakemake_run_parameters,
f"{inputs_obj.workdir}/config/run_params_MR.yaml",
)
),
WriteYaml(
snakemakedetails.snakemake_run_conf,
f"{inputs_obj.workdir}/config/run_configs_MR.yaml",
),
],
restart_times=3,
keepgoing=True,
restart_times=snakemakedetails.snakemake_run_conf["restart-times"],
keepgoing=snakemakedetails.snakemake_run_conf["keep-going"],
quiet=["all"], # type: ignore
log_handler=[
ViroConstrictor.logging.snakemake_logger(logfile=inputs_obj.logfile),
],
printshellcmds=False,
scheduler="greedy",
printshellcmds=snakemakedetails.snakemake_run_conf["printshellcmds"],
scheduler=snakemakedetails.snakemake_run_conf["scheduler"],
)

return snakemake(
Expand All @@ -61,6 +72,8 @@ def run_snakemake(
cores=snakemakedetails.snakemake_run_conf["cores"],
use_conda=snakemakedetails.snakemake_run_conf["use-conda"],
conda_frontend="mamba",
use_singularity=snakemakedetails.snakemake_run_conf["use-singularity"],
singularity_args=construct_container_bind_args(inputs_obj.samples_dict),
jobname=snakemakedetails.snakemake_run_conf["jobname"],
latency_wait=snakemakedetails.snakemake_run_conf["latency-wait"],
drmaa=snakemakedetails.snakemake_run_conf["drmaa"],
Expand All @@ -70,16 +83,20 @@ def run_snakemake(
WriteYaml(
snakemakedetails.snakemake_run_parameters,
f"{inputs_obj.workdir}/config/run_params_MR.yaml",
)
),
WriteYaml(
snakemakedetails.snakemake_run_conf,
f"{inputs_obj.workdir}/config/run_configs_MR.yaml",
),
],
restart_times=3,
keepgoing=True,
restart_times=snakemakedetails.snakemake_run_conf["restart-times"],
keepgoing=snakemakedetails.snakemake_run_conf["keep-going"],
quiet=["all"], # type: ignore
log_handler=[
ViroConstrictor.logging.snakemake_logger(logfile=inputs_obj.logfile),
],
printshellcmds=False,
scheduler="greedy",
printshellcmds=snakemakedetails.snakemake_run_conf["printshellcmds"],
scheduler=snakemakedetails.snakemake_run_conf["scheduler"],
)


Expand Down Expand Up @@ -156,9 +173,11 @@ def replacement_merge_dataframe_on_cols(
"""
for i in zip(cols_left, cols_right):
original_df[i[0]] = original_df.apply(
lambda x: override_df[i[1]][override_df["sample"] == x["SAMPLE"]].values[0]
if x["SAMPLE"] in override_df["sample"].values and x[i[0]] != "NONE"
else x[i[0]],
lambda x: (
override_df[i[1]][override_df["sample"] == x["SAMPLE"]].values[0]
if x["SAMPLE"] in override_df["sample"].values and x[i[0]] != "NONE"
else x[i[0]]
),
axis=1,
)
return original_df
Expand All @@ -185,6 +204,11 @@ def process_match_ref(parsed_inputs: CLIparser) -> CLIparser:
log.info(
f"{'='*20} [bold orange_red1] Starting Match-reference process [/bold orange_red1] {'='*20}"
)

if download_containers(snakemakedetails.snakemake_run_conf) != 0:
log.error("Failed to download containers required for workflow.\nPlease check the logs and your settings for more information and try again later.")
sys.exit(1)

status = run_snakemake(inputs_obj_match_ref, snakemakedetails)

workflow_state: Literal["Failed", "Success"] = (
Expand Down
Loading

0 comments on commit 7679e8e

Please sign in to comment.