.github/workflows/run-pipeline.yml

name: Run snakemake pipeline

on:
  workflow_call:
    inputs:
      environment_file:
        required: true
        type: string
      branch:
        required: false
        default: ${{ github.ref_name }}
        type: string
      prerun_cmd:
        required: false
        type: string
      pipeline_file:
        required: true
        type: string
      pipeline_extra_flags:
        required: false
        type: string
      no_gpu:
        required: false
        type: boolean
        default: true
      pipeline_directory:
        required: false
        type: string
        default: ./example
      pipeline_config:
        required: false
        type: string
      dry_run:
        required: false
        default: true
        type: boolean
      download_fasta_data:
        required: false
        default: false
        type: boolean
      fasta_download_path:
        required: false
        type: string
      postrun_cmd:
        required: false
        type: string
      upload_training_outputs:
        required: false
        default: false
        type: boolean
      upload_pretrained_outputs:
        required: false
        default: false
        type: boolean
      upload_regenie_outputs:
        required: false
        default: false
        type: boolean
      download_training_outputs:
        required: false
        default: false
        type: boolean
      download_pretrained_outputs:
        required: false
        default: false
        type: boolean
      download_regenie_outputs:
        required: false
        default: false
        type: boolean
      run_training_results_check:
        required: false
        default: false
        type: boolean
      run_burden_results_check:
        required: false
        default: false
        type: boolean
      run_association_results_check:
        required: false
        default: false
        type: boolean
      run_regenie_association_results_check:
        required: false
        default: false
        type: boolean

jobs:
  Run-Pipeline:
      runs-on: ubuntu-latest
      env:
        CUDA_VISIBLE_DEVICES: -1
      steps:
        - name: Check out repository code
          uses: actions/checkout@v4
          with:
            ref: ${{inputs.branch}}
        - uses: mamba-org/setup-micromamba@v1.8.1
          with:
            environment-file: ${{inputs.environment_file}}
            cache-environment: true
            cache-downloads: true
        - name: Install DeepRVAT
          run: pip install -e ${{ github.workspace }}
          shell: micromamba-shell {0}
        - name: Cache Fasta file
          if: inputs.download_fasta_data
          id: cache-fasta
          uses: actions/cache@v4
          with:
            path: ${{ inputs.fasta_download_path}}
            key: cache-reference-fasta-${{ inputs.fasta_download_path}}
        - name: Download and unpack fasta data
          if: inputs.download_fasta_data && steps.cache-fasta.outputs.cache-hit != 'true'
          run: |
            wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_44/GRCh38.primary_assembly.genome.fa.gz \
            -O ${{ inputs.fasta_download_path}}/GRCh38.primary_assembly.genome.fa.gz \
            && gzip -d ${{ inputs.fasta_download_path}}/GRCh38.primary_assembly.genome.fa.gz
        - name: Run pre pipeline cmd
          if: inputs.prerun_cmd
          run: ${{inputs.prerun_cmd}}
          shell: bash -el {0}
        - name: "Running pipeline ${{ github.jobs[github.job].name }}"
          run: |
            python -m snakemake ${{ (inputs.dry_run && '-n') || '' }} \
            -j 2 --directory ${{inputs.pipeline_directory}} \
            ${{ (endsWith(inputs.pipeline_config, 'ml') && '--configfile')  || '' }}  ${{ inputs.pipeline_config }} \
            --snakefile ${{inputs.pipeline_file}} --show-failed-logs -F -p ${{ inputs.pipeline_extra_flags }}
          shell: micromamba-shell {0}
        - name: Run post pipeline cmd
          if: inputs.postrun_cmd
          run: ${{inputs.postrun_cmd}}
          shell: micromamba-shell {0}
        - name: Upload Training Outputs
          id: uploaded_training_outputs
          if: inputs.upload_training_outputs
          uses: actions/upload-artifact@v4
          with:
            name: completed_training_outputs
            path: |
              ./example/**/seed_genes.parquet
              ./example/**/covariates.zarr/
              ./example/**/y.zarr/
              ./example/**/input_tensor.zarr/
              ./example/**/models/
            include-hidden-files: true #for .zarr needed
            retention-days: 1
        - name: Upload Pretrained Outputs
          id: uploaded_pretrained_outputs
          if: inputs.upload_pretrained_outputs
          uses: actions/upload-artifact@v4
          with:
            name: completed_pretrained_outputs
            path: |
              ./example/**/sample_ids.zarr/
              ./example/**/y.zarr/
              ./example/**/x.zarr/
              ./example/**/burdens.zarr/
              ./example/**/genes.npy
              ./example/**/all_results.parquet
            include-hidden-files: true #for .zarr needed
            retention-days: 1
        - name: Upload Regenie Outputs
          id: uploaded_regenie_outputs
          if: inputs.upload_regenie_outputs
          uses: actions/upload-artifact@v4
          with:
            name: completed_regenie_outputs
            path: |
              ./example/**/all_results.parquet
            retention-days: 1
        - name: Download Previous Training Outputs
          id: downloaded_training_outputs
          if: inputs.download_training_outputs
          uses: actions/download-artifact@v4
          with:
            name: completed_training_outputs
            path: ./tests/completed_training_outputs
        - name: Download Previous Pretrained Outputs
          id: downloaded_pretrained_outputs
          if: inputs.download_pretrained_outputs
          uses: actions/download-artifact@v4
          with:
            name: completed_pretrained_outputs
            path: ./tests/completed_pretrained_outputs
        - name: Download Previous Regenie Outputs
          id: downloaded_regenie_outputs
          if: inputs.download_regenie_outputs
          uses: actions/download-artifact@v4
          with:
            name: completed_regenie_outputs
            path: ./tests/completed_regenie_outputs
        # - name: Display structure of downloaded files
        #   if: inputs.download_outputs
        #   run: ls -R ./tests/completed_run_output
        - name: Run Training Results Check
          if: inputs.run_training_results_check
          run: |
            python $GITHUB_WORKSPACE/tests/deeprvat/compare_reference.py compare-training \
            ./example/ ./tests/completed_training_outputs/ \
            "Cholesterol" "Platelet_count"
          shell: micromamba-shell {0}
        - name: Run Burden Score Results Check
          if: inputs.run_burden_results_check
          run: |
            python $GITHUB_WORKSPACE/tests/deeprvat/compare_reference.py compare-burdens \
            ./example/ ./tests/completed_pretrained_outputs/ \
            "Cholesterol" "Platelet_count"
          shell: micromamba-shell {0}
        - name: Run Association Results Check
          if: inputs.run_association_results_check
          run: |
            python $GITHUB_WORKSPACE/tests/deeprvat/compare_reference.py compare-association \
            ./example/ ./tests/completed_pretrained_outputs/ \
            "Cholesterol" "Platelet_count"
          shell: micromamba-shell {0}
        - name: Run REGENIE Association Results Check
          if: inputs.run_regenie_association_results_check
          run: |
            python $GITHUB_WORKSPACE/tests/deeprvat/compare_reference.py compare-association \
            ./example/ ./tests/completed_regenie_outputs/ \
            "Cholesterol" "Platelet_count"
          shell: micromamba-shell {0}