From df2ccf855ef502136cdb1f74f105245d9b644ef2 Mon Sep 17 00:00:00 2001 From: James Hadfield Date: Thu, 23 Sep 2021 16:13:03 +1200 Subject: [PATCH] Add preprocess GitHub actions These actions are intended to be triggered via repository dispatch events sent from the appropriate fetch-and-ingest actions in nextstrain/ncov-ingest. Testing reveals that the GISAID preprocessing takes around 6 hours (and this cannot be sped up by throwing more cores at the problem) so we detach immediately after submitting the AWS job so that we don't run into the GitHub action's 6 hour job limit. We may wish to have this preprocessing job (snakemake profile) trigger an appropriate phylogenetic rebuild in the future, but this has not been implemented here. These actions can also be triggered through the GitHub UI, with the user choosing whether to upload to a trial location or overwrite the existing files. --- .github/workflows/preprocess-gisaid.yml | 60 +++++++++++++++++++++++++ .github/workflows/preprocess-open.yml | 60 +++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 .github/workflows/preprocess-gisaid.yml create mode 100644 .github/workflows/preprocess-open.yml diff --git a/.github/workflows/preprocess-gisaid.yml b/.github/workflows/preprocess-gisaid.yml new file mode 100644 index 000000000..b8b99af5a --- /dev/null +++ b/.github/workflows/preprocess-gisaid.yml @@ -0,0 +1,60 @@ +name: preprocess-gisaid + +on: + ## This workflow is intended to be run via a repository dispatch event sent from + ## https://github.com/nextstrain/ncov-ingest/blob/master/.github/workflows/fetch-and-ingest-gisaid-master.yml + repository_dispatch: + types: + - preprocess-gisaid + ## Manual running (via GitHub UI) is also be available, if needed, via workflow dispatch + workflow_dispatch: + inputs: + trial_name: + description: 'Short name to use as a prefix for the uploaded data. WARNING: without this we will overwrite the files in s3://nextstrain-ncov-private.' + required: false + +env: + S3_DST_BUCKET: ${{ github.event.inputs.trial_name && format('nextstrain-ncov-private/trial/{0}', github.event.inputs.trial_name) || 'nextstrain-ncov-private' }} + + +jobs: + gisaid: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: Setup + run: ./scripts/setup-github-workflow + + - name: Launch preprocess build + run: | + set -x + nextstrain build \ + --aws-batch \ + --cpus 16 \ + --detach \ + --memory 14GiB \ + . \ + upload \ + --set-threads align=16 \ + --profile nextstrain_profiles/nextstrain-gisaid-preprocess \ + --config S3_DST_BUCKET=${S3_DST_BUCKET} \ + |& tee build-launch.log + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + + - name: Build info + run: | + echo "--> Preprocessed files will be uploaded as:" + echo " s3://${S3_DST_BUCKET}/aligned.fasta.xz" + echo " s3://${S3_DST_BUCKET}/masked.fasta.xz" + echo " s3://${S3_DST_BUCKET}/filtered.fasta.xz" + echo " s3://${S3_DST_BUCKET}/mutation-summary.tsv.xz" + echo + echo "--> Attach command" + tail -n1 build-launch.log + echo + JOBID=$( tail -n1 build-launch.log | sed -E 's/.+attach ([-a-f0-9]+).+/\1/' ) + echo "--> View this job in the AWS console via" + echo " https://console.aws.amazon.com/batch/home?region=us-east-1#jobs/detail/${JOBID}" \ No newline at end of file diff --git a/.github/workflows/preprocess-open.yml b/.github/workflows/preprocess-open.yml new file mode 100644 index 000000000..af0cc6b17 --- /dev/null +++ b/.github/workflows/preprocess-open.yml @@ -0,0 +1,60 @@ +name: preprocess-open + +on: + ## This workflow is intended to be run via a repository dispatch event sent from + ## https://github.com/nextstrain/ncov-ingest/blob/master/.github/workflows/fetch-and-ingest-genbank-master.yml + repository_dispatch: + types: + - preprocess-open + - preprocess-genbank + ## Manual running (via GitHub UI) is also be available, if needed, via workflow dispatch. + workflow_dispatch: + inputs: + trial_name: + description: 'Short name to use as a prefix for the uploaded data. WARNING: without this we will overwrite the files in s3://nextstrain-data/files/ncov/open.' + required: false + +env: + S3_DST_BUCKET: ${{ github.event.inputs.trial_name && format('nextstrain-staging/files/ncov/open/trial/{0}', github.event.inputs.trial_name) || 'nextstrain-data/files/ncov/open' }} + +jobs: + open: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: Setup + run: ./scripts/setup-github-workflow + + - name: Launch preprocess build + run: | + set -x + nextstrain build \ + --aws-batch \ + --cpus 16 \ + --detach \ + --memory 14GiB \ + . \ + upload \ + --set-threads align=16 \ + --profile nextstrain_profiles/nextstrain-open-preprocess \ + --config S3_DST_BUCKET=${S3_DST_BUCKET} \ + |& tee build-launch.log + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + + - name: Build info + run: | + echo "--> Preprocessed files will be uploaded as:" + echo " s3://${S3_DST_BUCKET}/aligned.fasta.xz" + echo " s3://${S3_DST_BUCKET}/masked.fasta.xz" + echo " s3://${S3_DST_BUCKET}/filtered.fasta.xz" + echo " s3://${S3_DST_BUCKET}/mutation-summary.tsv.xz" + echo + echo "--> Attach command" + tail -n1 build-launch.log + echo + JOBID=$( tail -n1 build-launch.log | sed -E 's/.+attach ([-a-f0-9]+).+/\1/' ) + echo "--> View this job in the AWS console via" + echo " https://console.aws.amazon.com/batch/home?region=us-east-1#jobs/detail/${JOBID}" \ No newline at end of file