Skip to content

Commit

Permalink
Merge pull request #43 from gymrek-lab/feat/multiple-testing
Browse files Browse the repository at this point in the history
feat: finish running on geuvadis
  • Loading branch information
aryarm authored Jun 5, 2024
2 parents 7876136 + 02f36c4 commit 2bf6d5f
Show file tree
Hide file tree
Showing 69 changed files with 8,294 additions and 2,085 deletions.
41 changes: 41 additions & 0 deletions .devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/ubuntu
{
"name": "Ubuntu",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/base:jammy",
"features": {
"ghcr.io/rocker-org/devcontainer-features/miniforge:1": {
"version": "latest",
"variant": "Mambaforge"
}
},

// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "mamba env create -n happler -f dev-env.yml && conda run -n happler poetry install",

// Configure tool-specific properties.
"customizations": {
"vscode": {
"extensions": ["ms-python.python"],
"settings": {
"python.condaPath": "/opt/conda/condabin/conda",
"python.defaultInterpreterPath": "/opt/conda/envs/happler/bin/python",
"python.terminal.activateEnvironment": true,
"python.terminal.activateEnvInCurrentTerminal": true,
"python.venvFolders": ["/home/vscode/.cache/pypoetry/virtualenvs"],
"terminal.integrated.environmentChangesRelaunch": true,
"terminal.integrated.hideOnStartup": "always"
}
}
}

// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
13 changes: 13 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
## Checklist

* [ ] I've checked to ensure there aren't already other open [pull requests](../../../pulls) for the same update/change
* [ ] I've prefixed the title of my PR according to [the conventional commits specification](https://www.conventionalcommits.org/). If your PR fixes a bug, please prefix the PR with `fix: `. Otherwise, if it introduces a new feature, please prefix it with `feat: `. If it introduces a breaking change, please add an exclamation before the colon, like `feat!: `. If the scope of the PR changes because of a revision to it, please update the PR title, since the title will be used in our CHANGELOG.
* [ ] At the top of the PR, I've [listed any open issues that this PR will resolve](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword). For example, "resolves #0" if this PR resolves issue #0
- [ ] I've explained my changes in a manner that will make it possible for both users and maintainers of happler to understand them
* [ ] I have followed the [contributing guidelines](https://happler.readthedocs.io/en/stable/project_info/contributing.html#how-to-fix-a-bug-or-implement-a-new-feature)
* [ ] I have adhered to the [style guidelines](https://happler.readthedocs.io/en/stable/project_info/contributing.html#style)
* [ ] I've added tests for any new functionality. Or, if this PR fixes a bug, I've added test(s) that replicate it
* [ ] I've updated the relevant documentation and checked that the newly built documentation is formatted properly
* [ ] All functions, modules, classes etc. still conform to [numpy docstring standards](https://numpydoc.readthedocs.io/en/latest/format.html)
* [ ] (if applicable) I've updated the pyproject.toml file with any changes I've made to happler's dependencies, and I've run `poetry lock --no-update` to ensure the lock file stays up to date and that our dependencies are locked to their minimum versions
* [ ] In the body of this PR, I've included a short address to the reviewer highlighting one or two items that might deserve their focus
5 changes: 0 additions & 5 deletions .github/workflows/constraints.txt

This file was deleted.

126 changes: 56 additions & 70 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
name: Tests

on:
- pull_request
on: [pull_request, workflow_call]

jobs:
tests:
Expand All @@ -11,13 +10,15 @@ jobs:
fail-fast: false
matrix:
include:
- { python: "3.7", os: "ubuntu-latest", session: "lint" }
- { python: "3.7", os: "ubuntu-latest", session: "tests" }
- { python: "3.8", os: "ubuntu-latest", session: "lint" }
- { python: "3.8", os: "ubuntu-latest", session: "tests" }
- { python: "3.9", os: "ubuntu-latest", session: "tests" }
- { python: "3.10", os: "ubuntu-latest", session: "tests" }
# - { python: "3.10", os: "windows-latest", session: "tests" }
# - { python: "3.10", os: "macos-latest", session: "tests" }
- { python: "3.11", os: "ubuntu-latest", session: "tests" }
- { python: "3.12", os: "ubuntu-latest", session: "tests" }
# - { python: "3.11", os: "windows-latest", session: "tests" }
# - { python: "3.9", os: "macos-latest", session: "tests" }
- { python: "3.8", os: "ubuntu-latest", session: "size" }

env:
NOXSESSION: ${{ matrix.session }}
Expand All @@ -26,90 +27,75 @@ jobs:

steps:
- name: Check out the repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v3
- name: Setup Mambaforge
uses: conda-incubator/setup-miniconda@v3
with:
python-version: ${{ matrix.python }}
activate-environment: happler
miniforge-variant: Mambaforge
auto-activate-base: false
miniforge-version: latest
use-mamba: true

- name: Upgrade pip
run: |
pip install --constraint=.github/workflows/constraints.txt pip
pip --version
- name: Upgrade pip in virtual environments
shell: python
run: |
import os
import pip
with open(os.environ["GITHUB_ENV"], mode="a") as io:
print(f"VIRTUALENV_PIP={pip.__version__}", file=io)
- name: Install Poetry
- name: Get Date
id: get-date
run: echo "today=$(/bin/date -u '+%Y%m%d')" >> $GITHUB_OUTPUT
shell: bash

- name: Cache Conda env
uses: actions/cache@v3
with:
path: ${{ env.CONDA }}/envs
key:
conda-${{ runner.os }}--${{ runner.arch }}--${{ steps.get-date.outputs.today }}-${{ hashFiles('dev-env.yml') }}-${{ env.CACHE_NUMBER }}
env:
# Increase this value to reset cache if dev-env.yml has not changed
CACHE_NUMBER: 0
id: cache

- name: Install dev environment
run:
mamba env update -n happler -f dev-env.yml
if: steps.cache.outputs.cache-hit != 'true'

- name: Try to build happler
shell: bash -el {0}
run: |
pipx install --pip-args=--constraint=.github/workflows/constraints.txt poetry
poetry --version
- name: Install Nox
poetry build --no-ansi
- name: Check distribution size
if: matrix.session == 'size'
run: |
pipx install --pip-args=--constraint=.github/workflows/constraints.txt nox
pipx inject --pip-args=--constraint=.github/workflows/constraints.txt nox nox-poetry
nox --version
- name: Run Nox
du -csh dist/*
# check that the generated dist/ directory does not exceed 0.3 MB
# if this check fails, it's because you forgot to list large files in a "tool.poetry.exclude" section of our pyproject.toml
# https://python-poetry.org/docs/pyproject/#include-and-exclude
[ $(du -b dist | cut -f1) -lt 300000 ]
- name: Run tests with nox
if: matrix.session != 'size'
shell: bash -el {0}
run: |
nox --python=${{ matrix.python }}
nox --verbose --python=${{ matrix.python }}
- name: Upload coverage data
if: always() && matrix.session == 'tests'
uses: "actions/upload-artifact@v3"
with:
name: coverage-data
path: ".coverage.*"

large-files:
name: File sizes
runs-on: ubuntu-latest
steps:
- name: Check out the repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Check for large files
uses: actionsdesk/lfs-warning@v3.2
with:
token: ${{ secrets.GITHUB_TOKEN }} # Optional
filesizelimit: 500000b
labelName: large-files

# coverage:
# runs-on: ubuntu-latest
# needs: tests
# steps:
# - name: Check out the repository
# uses: actions/checkout@v3

# - name: Set up Python
# uses: actions/setup-python@v3
# with:
# python-version: "3.10"

# - name: Upgrade pip
# run: |
# pip install --constraint=.github/workflows/constraints.txt pip
# pip --version
# - name: Install Poetry
# run: |
# pipx install --pip-args=--constraint=.github/workflows/constraints.txt poetry
# poetry --version
# - name: Install Nox
# run: |
# pipx install --pip-args=--constraint=.github/workflows/constraints.txt nox
# pipx inject --pip-args=--constraint=.github/workflows/constraints.txt nox nox-poetry
# nox --version
# - name: Download coverage data
# uses: actions/download-artifact@v3
# with:
# name: coverage-data

# - name: Combine coverage data and display human readable report
# run: |
# nox --session=coverage
# - name: Create coverage report
# run: |
# nox --session=coverage -- xml
# - name: Upload coverage report
# uses: codecov/codecov-action@v3.1.0
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ dist/
analysis/data
analysis/out
analysis/log
analysis/Rplots.pdf
analysis/myenv.RData
.snakemake
.ipynb_checkpoints
# vscode
.vscode
venv/
28 changes: 18 additions & 10 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,26 @@
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Note: I used https://github.com/readthedocs/readthedocs.org/issues/4912#issuecomment-664002569 for inspiration
# Note: I used https://docs.readthedocs.io/en/stable/build-customization.html#install-dependencies-with-poetry for inspiration

version: 2

build:
os: "ubuntu-22.04"
tools:
python: "3.8"
jobs:
post_create_environment:
# Install poetry
# https://python-poetry.org/docs/#installing-manually
- pip install poetry
post_install:
# Install dependencies with 'docs' dependency group
# https://python-poetry.org/docs/managing-dependencies/#dependency-groups
# VIRTUAL_ENV needs to be set manually for now.
# See https://github.com/readthedocs/readthedocs.org/pull/11152/
- VIRTUAL_ENV=$READTHEDOCS_VIRTUALENV_PATH poetry install --only main,docs

sphinx:
configuration: docs/conf.py

python:
version: 3.7
install:
- method: pip
path: .
extra_requirements:
- docs

fail_on_warning: true
10 changes: 5 additions & 5 deletions analysis/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
![Snakemake](https://img.shields.io/badge/snakemake-�~I�6.7.0-brightgreen.svg?style=flat-square)](https://snakemake.bitbucket.io)
![Snakemake](https://img.shields.io/badge/snakemake-�~I�8.12.0-brightgreen.svg?style=flat-square)](https://snakemake.bitbucket.io)

# download
Execute the following command.
Expand All @@ -8,9 +8,9 @@ git clone https://github.com/aryarm/happler
You can also download example data for the pipeline. See [the config file](config/config.yml) for links and instructions.

# setup
The pipeline is written as a Snakefile which can be executed via [Snakemake](https://snakemake.readthedocs.io). For reproduciblity, we recommend installing the version that we used (6.7.0):
The pipeline is written as a Snakefile which can be executed via [Snakemake](https://snakemake.readthedocs.io). For reproduciblity, we recommend installing the version that we used (8.12.0):
```
conda create -n snakemake -c conda-forge --no-channel-priority 'bioconda::snakemake==6.7.0'
conda create -n snakemake -c conda-forge --no-channel-priority 'bioconda::snakemake==8.12.0'
```
`snakemake` will [automatically install all dependencies](https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#integrated-package-management) of the pipeline upon its first execution using `conda`.

Expand All @@ -25,9 +25,9 @@ conda create -n snakemake -c conda-forge --no-channel-priority 'bioconda::snakem
```
./run.bash &
```
__or__ on a TORQUE cluster:
__or__ on a SLURM cluster:
```
qsub run.bash
sbatch run.bash
```
### Output
All output of the pipeline will be placed in a new directory (`out/`, by default).
Expand Down
87 changes: 87 additions & 0 deletions analysis/config/config-geuvadis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# This is the Snakemake configuration file that specifies paths and
# and options for the pipeline. Anybody wishing to use
# the provided snakemake pipeline should first fill out this file with paths to
# their own data, as the Snakefile requires it.
# Every config option has reasonable defaults unless it is labeled as "required."
# All paths are relative to the directory that Snakemake is executed in.
# Note: this file is written in the YAML syntax (https://learnxinyminutes.com/docs/yaml/)


# Paths to a SNP-STR haplotype reference panel
# You can download this from http://gymreklab.com/2018/03/05/snpstr_imputation.html
# If the VCFs are per-chromosome, replace the contig name in the file name with "{chr}"
# The VCF(s) must be sorted and indexed (with a .tbi file in the same directory)
# required!
# ref_panel: "/projects/ps-gymreklab/resources/datasets/snpstr/1kg.snp.str.chr{chr}.vcf.gz"
# snp_panel: "/projects/ps-gymreklab/resources/datasets/ukbiobank/array_imputed/pfile_converted/chr{chr}.pgen"
snp_panel: "data/geuvadis/geuvadis_ensemble_phasing.pgen"
# str_panel: "/tscc/projects/ps-gymreklab/jmargoli/ukbiobank/str_imputed/runs/first_pass/vcfs/annotated_strs/chr{chr}.vcf.gz"

# Path to a list of samples to exclude from the analysis
# There should be one sample ID per line
# exclude_samples: data/ukb_random_samples_exclude.tsv

# If SNPs are unphased, provide the path to a SHAPEIT4 map file like these:
# https://github.com/odelaneau/shapeit4/tree/master/maps
# The map file should use the same reference genome as the reference panel VCFs
# phase_map: data/genetic_maps/chr{chr}.b37.gmap.gz

# A "locus" is a string with a contig name, a colon, the start position, a dash, and
# the end position or a BED file with a ".bed" file ending
# There are different simulation modes that you can use:
# 1. "str" - a tandem repeat is a string with a contig name, a colon, and the start position
# 2. "snp" - a SNP follows the same format as "str"
# 3. "hap" - a haplotype
# 4. "ld_range" - creates random two-SNP haplotypes with a range of LD values between the alleles of each haplotype
# 5. "run" - execute happler on a locus without simulating anything
# The STR and SNP positions should be contained within the locus.
# The positions should be provided in the same coordinate system as the reference
# genome of the reference panel VCFs
# The contig should correspond with the contig name from the {chr} wildcard in the VCF
# required! and unless otherwise noted, all attributes of each mode are required
# locus: 19:45401409-46401409 # center: 45901409 (APOe4)
locus: data/geuvadis/geuvadis_eqtl_genes.full.liftover.bed
modes:
str:
pos: 19:45903857 # STR_691361
snp:
pos: 19:45910672 # rs1046282
hap:
alleles: [rs36046716:G, rs1046282:G] # 45892145, 45910672
beta: [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]
ld_range:
reps: 1
min_ld: 0
max_ld: 1
step: 0.1
min_af: 0.25
max_af: 0.75
# beta: [0.35]
beta: [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45]
alpha: [0.05]
random: false # whether to also produce random haplotypes
run:
pheno: data/geuvadis/phenos/{trait}.pheno
SVs: data/geuvadis/pangenie_hprc_hg38_all-samples_bi_SVs-missing_removed.pgen
# pheno_matrix: data/geuvadis/EUR_converted_expr_hg38.csv # optional
mode: run

# Covariates to use if they're needed
# Otherwise, they're assumed to be regressed out of the phenotypes
# Note: the finemapping methods won't be able to handle these covariates
# covar: data/geuvadis/5PCs_sex.covar

# Discard rare variants with a MAF below this number
# Defaults to 0 if not specified
min_maf: 0.1

# Sample sizes to use
# sample_size: [500, 1000, 1500, 2000, 2500]
# sample_size: 777

# Whether to include the causal variant in the set of genotypes provided to the
# finemapping methods. Set this to true if you're interested in seeing how the
# methods perform when the causal variant is absent from the data.
# Defaults to false if not specified
# You can also provide a list of booleans, if you want to test both values
exclude_causal: [true, false]
Loading

0 comments on commit 2bf6d5f

Please sign in to comment.