Skip to content

Commit

Permalink
Install tabix via apt-get, and use the notebook python for install
Browse files Browse the repository at this point in the history
  • Loading branch information
hyanwong committed Jul 26, 2024
1 parent f1d1cb7 commit 1342e5d
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 45 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ jobs:
- name: Build Docs
run: |
. venv/bin/activate
eval "$(conda shell.bash hook)"
conda activate
cd docs && make dist
- name: Trigger docs site rebuild
Expand Down
80 changes: 39 additions & 41 deletions docs/simulation-example.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,42 @@
import os
import builtins
import subprocess
import sys

import msprime
import tqdm

sys.path.insert(0, os.path.abspath(".."))
import tsinfer # noqa

if True:
ts = msprime.simulate(
sample_size=10000,
Ne=10**4,
recombination_rate=1e-8,
mutation_rate=1e-8,
length=10 * 10**6,
random_seed=42,
)
ts.dump("simulation-source.trees")
print("Simulation done:", ts.num_trees, "trees and", ts.num_sites)

with tsinfer.SampleData(
sequence_length=ts.sequence_length,
path="simulation.samples",
num_flush_threads=2,
) as samples:
for var in tqdm.tqdm(ts.variants(), total=ts.num_sites):
samples.add_site(var.site.position, var.genotypes, var.alleles)

else:
source = msprime.load("simulation-source.trees")
inferred = msprime.load("simulation.trees")

subset = range(0, 6)
source_subset = source.simplify(subset)
inferred_subset = inferred.simplify(subset)

tree = source_subset.first()
print("True tree: interval=", tree.interval)
print(tree.draw(format="unicode"))

tree = inferred_subset.first()
print("Inferred tree: interval=", tree.interval)
print(tree.draw(format="unicode"))
import numpy as np
from Bio import bgzf

if getattr(builtins, "__IPYTHON__", False): # if running IPython: e.g. in a notebook
num_diploids, seq_len = 100, 10_000
name = "notebook-simulation"
else: # Take parameters from the command-line
num_diploids, seq_len = int(sys.argv[1]), float(sys.argv[2])
name = "cli-simulation"

ts = msprime.sim_ancestry(
num_diploids,
population_size=10**4,
recombination_rate=1e-8,
sequence_length=seq_len,
random_seed=6,
)
ts = msprime.sim_mutations(ts, rate=1e-8, random_seed=7)
ts.dump(name + "-source.trees")
print(
f"Simulated {ts.num_samples} samples over {seq_len/1e6} Mb:",
f"{ts.num_trees} trees and {ts.num_sites} sites",
)

# Convert to a zarr file: this should be easier once a tskit2zarr utility is made, see
# https://github.com/sgkit-dev/bio2zarr/issues/232
np.save(f"{name}-AA.npy", [s.ancestral_state for s in ts.sites()])
vcf_name = f"{name}.vcf.gz"
with bgzf.open(vcf_name, "wt") as f:
ts.write_vcf(f)
subprocess.run(["tabix", vcf_name])
ret = subprocess.run(
"python -m bio2zarr vcf2zarr convert --force".split() + [vcf_name, name + ".vcz"],
stderr=subprocess.DEVNULL if name == "notebook-simulation" else None,
)
if ret.returncode == 0:
print(f"Converted to {name}.vcz")
14 changes: 10 additions & 4 deletions docs/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,10 @@ Python to simulate some data under the coalescent with recombination, using
import builtins
import sys
from Bio import bgzf
import os
import subprocess
from Bio import bgzf
import numpy as np
import msprime
Expand All @@ -213,10 +215,12 @@ import tsinfer
if getattr(builtins, "__IPYTHON__", False): # if running IPython: e.g. in a notebook
num_diploids, seq_len = 100, 10_000
name = "notebook-simulation"
python = sys.executable
else: # Take parameters from the command-line
num_diploids, seq_len = int(sys.argv[1]), float(sys.argv[2])
name = "cli-simulation"
python = "python"
ts = msprime.sim_ancestry(
num_diploids,
population_size=10**4,
Expand All @@ -239,9 +243,11 @@ with bgzf.open(vcf_name, "wt") as f:
ts.write_vcf(f)
subprocess.run(["tabix", vcf_name])
ret = subprocess.run(
"python -m bio2zarr vcf2zarr convert --force".split() + [vcf_name, name+".vcz"],
stderr = subprocess.DEVNULL if name == "notebook-simulation" else None
[python, "-m", "bio2zarr", "vcf2zarr", "convert", "--force", vcf_name, f"{name}.vcz"],
stderr = subprocess.DEVNULL if name == "notebook-simulation" else None,
)
assert os.path.exists(f"{name}.vcz")
if ret.returncode == 0:
print(f"Converted to {name}.vcz")
```
Expand Down

0 comments on commit 1342e5d

Please sign in to comment.