Skip to content

Commit

Permalink
Merge pull request #21 from icbi-lab/copykat
Browse files Browse the repository at this point in the history
copyKAT function
  • Loading branch information
grst committed Sep 13, 2021
2 parents e8960f9 + bbac98e commit b86c812
Show file tree
Hide file tree
Showing 9 changed files with 220 additions and 9 deletions.
35 changes: 30 additions & 5 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,62 @@ on:

jobs:
test:
runs-on: ${{ matrix.os }}
runs-on: ${{ matrix.os.os }}
name: ${{ matrix.os.os }} (R=${{ matrix.R }}, Python=${{ matrix.python-version }})
strategy:
fail-fast: false
matrix:
python-version: [3.8]
os: [ubuntu-latest, macos-latest, windows-latest]
R: ['release']
os:
- {os: ubuntu-latest, rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: macos-latest}
- {os: windows-latest}

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
RSPM: ${{ matrix.os.rspm }}

steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0 # required for setuptools-scm

- uses: actions/cache@v1
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
key: ${{ runner.os.os }}-pip-${{ hashFiles('pyproject.toml') }}
restore-keys: |
${{ runner.os }}-pip-
${{ runner.os.os }}-pip-
- name: Install macOS system dependencies
if: matrix.os == 'macos-latest'
run: |
brew install cairo pkg-config autoconf automake libtool
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}

- name: Setup R
uses: r-lib/actions/setup-r@v1
with:
r-version: ${{ matrix.R }}

- name: Install dependencies
run: |
pip install .[test]
pip install .[test,copykat]
- name: Install R dependencies
run: |
Rscript -e "install.packages('remotes')" -e "remotes::install_github('navinlabcode/copykat')"
- name: Check black formatting
run: |
black --check .
- name: Test with pytest
run: |
pytest
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
test_copykat*
*.code-workspace
.vscode/*
!.vscode/settings.json.default
Expand Down
19 changes: 16 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ but plays nicely with `scanpy <https://scanpy.readthedocs.io/en/stable/index.htm
.. image:: img/infercnv_heatmap.png
:align: center
:alt: The main result of infercnv


**WARNING**:

**This package is still experimental. The results have not been validated,
except in that they look similar, but not identical, to the results of InferCNV.**

Expand Down Expand Up @@ -84,6 +84,19 @@ There are several alternative options to install infercnvpy:
.. where `tag` is one of `these tags <https://quay.io/repository/biocontainers/infercnvpy?tab=tags>`_.
To (optionally) run the :code:`copyKAT` algorithm, you need a working R installation
and the `copykat <https://github.com/navinlabcode/copykat#step-1-installation>`_ package
installed. Usually, if :code:`R` is in your :code:`PATH`, `rpy2 <https://rpy2.github.io/>`_ automatically
detects your R installation. If you get an error message while importing :code:`infercnvpy`,
try setting the :code:`R_HOME` environment variable before importing infercnvpy:

.. code-block:: python
import os
os.environ["R_HOME"] = "/usr/lib/R"
import infercnvpy
Release notes
^^^^^^^^^^^^^
See the `release section <https://github.com/icbi-lab/infercnvpy/releases>`_.
Expand Down
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ InferCNV
:toctree: ./generated

infercnv
copykat
cnv_score

Embeddings
Expand Down
12 changes: 12 additions & 0 deletions docs/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,15 @@ @article{Tirosh2016
month = {11}
}

@article{Gao2021,
doi = {10.1038/s41587-020-00795-2},
url = {https://doi.org/10.1038/s41587-020-00795-2},
year = {2021},
month = jan,
publisher = {Nature},
volume = {39},
pages = {599--608},
author = {Gao R. and Bai S. and Henderson YC and Lin Y. and Schalck A. and Yan Y. and Kumar T. and Hu M. and Sei E. and Davis A. and Wang F. and Shaitelman SF and Wang JR and Chen K. and Moulder S. and Lai SY and Navin NE},
title = {Delineating copy number and clonal substructure in human tumors from single-cell transcriptomes},
journal = {Nature Biotechnology}
}
6 changes: 6 additions & 0 deletions infercnvpy/tests/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from infercnvpy.tl._infercnv import _get_reference
import pytest
import numpy as np
import scanpy as sc
import numpy.testing as npt


Expand Down Expand Up @@ -50,6 +51,11 @@ def test_infercnv(adata_oligodendroma, reference_key, reference_cat):
)


def test_copykat(adata_oligodendroma):
sc.pp.subsample(adata_oligodendroma, n_obs=50)
cnv.tl.copykat(adata_oligodendroma)


def test_workflow(adata_oligodendroma):
cnv.tl.infercnv(adata_oligodendroma)
cnv.tl.pca(adata_oligodendroma)
Expand Down
1 change: 1 addition & 0 deletions infercnvpy/tl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Union
from ._infercnv import infercnv, cnv_score
from ._copykat import copykat
import numpy as np
from anndata import AnnData
import scanpy as sc
Expand Down
148 changes: 148 additions & 0 deletions infercnvpy/tl/_copykat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
from typing import Optional
import pandas as pd
from scipy.sparse import issparse
from anndata import AnnData
from scanpy import logging
import os
from multiprocessing import cpu_count


def copykat(
adata: AnnData,
gene_ids: str = "S",
segmentation_cut: float = 0.1,
distance: str = "euclidean",
s_name: str = "copykat_result",
min_genes_chr: int = 5,
key_added: str = "cnv",
inplace: bool = True,
layer: str = None,
n_jobs: Optional[int] = None,
) -> pd.DataFrame:
"""Inference of genomic copy number and subclonal structure.
Runs CopyKAT (Copynumber Karyotyping of Tumors) :cite:`Gao2021` based on integrative
Bayesian approaches to identify genome-wide aneuploidy at 5MB resolution
in single cells to separate tumor cells from normal cells, and tumor
subclones using high-throughput sc-RNAseq data.
Note on input data from the original authors:
The matrix values are often the count of unique molecular identifier (UMI)
from nowadays high througput single cell RNAseq data. The early generation of
scRNAseq data may be summarized as TPM values or total read counts,
which should also work.
This means that unlike for :func:`infercnvpy.tl.infercnv` the input data
should not be log-transformed.
CopyKAT also does NOT require running :func:`infercnvpy.io.genomic_position_from_gtf`,
it infers the genomic position from the gene symbols in `adata.var_names`.
You can find more info on GitHub: https://github.com/navinlabcode/copykat
Parameters
----------
adata
annotated data matrix
key_added
Key under which the copyKAT scores will be stored in `adata.obsm` and `adata.uns`.
inplace
If True, store the result in adata, otherwise return it.
gene_ids
gene id type: Symbol ("S") or Ensemble ("E").
segmentation_cut
segmentation parameters, input 0 to 1; larger looser criteria.
distance
distance methods include "euclidean", and correlation coverted distance include "pearson" and "spearman".
s_name
sample (output file) name.
min_genes_chr
minimal number of genes per chromosome for cell filtering.
n_jobs
Number of cores to use for copyKAT analysis. Per default, uses all cores
available on the system. Multithreading does not work on Windows and this
value will be ignored.
Returns
-------
Depending on the value of `inplace`, either returns `None` or a vector
with scores.
"""

if n_jobs is None:
n_jobs = cpu_count()
if os.name != "posix":
n_jobs = 1

try:
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri, numpy2ri
from rpy2.robjects.conversion import localconverter
from rpy2 import robjects as ro
except ImportError:
raise ImportError("copyKAT requires rpy2 to be installed. ")

try:
copyKAT = importr("copykat")
except ImportError:
raise ImportError(
"copyKAT requires a valid R installation with the following packages: "
"copykat"
)

logging.info("Preparing R objects")
with localconverter(ro.default_converter + numpy2ri.converter):
expr = adata.X if layer is None else tmp_adata.layers[layer]
if issparse(expr):
expr = expr.T.toarray()
else:
expr = expr.T
ro.globalenv["expr_r"] = ro.conversion.py2rpy(expr)
ro.globalenv["gene_names"] = ro.conversion.py2rpy(list(adata.var.index))
ro.globalenv["cell_IDs"] = ro.conversion.py2rpy(list(adata.obs.index))
ro.globalenv["n_jobs"] = ro.conversion.py2rpy(n_jobs)
ro.globalenv["gene_ids"] = ro.conversion.py2rpy(gene_ids)
ro.globalenv["segmentation_cut"] = ro.conversion.py2rpy(segmentation_cut)
ro.globalenv["distance"] = ro.conversion.py2rpy(distance)
ro.globalenv["s_name"] = ro.conversion.py2rpy(s_name)
ro.globalenv["min_gene_chr"] = ro.conversion.py2rpy(min_genes_chr)

logging.info("Running copyKAT")
ro.r(
f"""
rownames(expr_r) <- gene_names
colnames(expr_r) <- cell_IDs
copyKAT_run <- copykat(rawmat = expr_r, id.type = gene_ids, ngene.chr = min_gene_chr, win.size = 25,
KS.cut = segmentation_cut, sam.name = s_name, distance = distance, norm.cell.names = "",
n.cores = n_jobs, output.seg = FALSE)
copyKAT_result <- copyKAT_run$CNAmat
colnames(copyKAT_result) <- c('chrom', 'chrompos', 'abspos', cell_IDs)
"""
)

with localconverter(
ro.default_converter + numpy2ri.converter + pandas2ri.converter
):
copyKAT_result = ro.conversion.rpy2py(ro.globalenv["copyKAT_result"])

chrom_pos = {
"chr_pos": {
f"chr{chrom}": int(pos)
for pos, chrom in copyKAT_result.loc[:, ["chrom"]]
.drop_duplicates()
.itertuples()
}
}

# Drop cols
new_cpkat = copyKAT_result.drop(["chrom", "chrompos", "abspos"], axis=1).values

# transpose
new_cpkat_trans = new_cpkat.T

if inplace:
adata.uns[key_added] = chrom_pos
adata.obsm["X_%s" % key_added] = new_cpkat_trans
else:
return new_cpkat_trans
6 changes: 5 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,13 @@ requires = [
]

[tool.flit.metadata.requires-extra]
copykat = [
'rpy2'
]
test = [
'pytest',
'black'
'black',
'pre-commit',
]
doc = [
'sphinx>=3.0.1,<3.1',
Expand Down

0 comments on commit b86c812

Please sign in to comment.