Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nikolaos integration #35

Merged
merged 20 commits into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[flake8]
ignore = E501,W504
max-line-length = 120
per-file-ignores = __init__.py:F401
inline-quotes = single
multiline-quotes = double
6 changes: 3 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
os: [ubuntu-latest]
python-version: ['3.8', '3.9', '3.10']

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
5 changes: 4 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ anndata = "==0.8.0"
celltypist = "==1.3.0"
pandas = "==2.0.0"
pydantic = ">=2.0"
scanpy = "==1.9.3"
scanpy = "*"
scvi-tools = "*"
scarches = "*"

[dev-packages]
flake8 = "*"
Expand All @@ -17,6 +19,7 @@ mypy = "*"
pytest = "*"
pytest-cov = "*"
tox = "*"
isort = "*"

[requires]
python_version = "3.8"
Expand Down
1,254 changes: 1,199 additions & 55 deletions Pipfile.lock

Large diffs are not rendered by default.

61 changes: 60 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,63 @@
[build-system]
requires = ["setuptools>=42.0", "wheel"]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "macta"
version = "0.0.4"
authors = [{ name = "Aleksandr Bekker", email = "alekso.bekker@gmail.com" }]
description = "Annotation tools for the MACTA suite"
keywords = ["cell type annotation", "single cell"]
readme = "README.md"
requires-python = ">=3.8"
license = { text = "GPL3" }
classifiers = [
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
]
dependencies = [
"anndata>=0.8.0",
"pandas>=2.0.0",
"pydantic>=2.0.0",
"scanpy>=1.9.3",
]

[project.urls]
"Homepage" = "https://github.com/AleksBekker/MACTA_py"

[project.optional-dependencies]

# Per-tool extras
celltypist = ["celltypist>=1.3.0"]
scanvi = ["scvi-tools", "scarches"]

# Per-version extras
py38 = ["macta[celltypist]"]
py39 = ["macta[celltypist]"]
py310 = ["macta[celltypist]"]
torch38 = ["macta[scanvi]"]

# Testing extras
testing = [
"flake8",
"flake8-quotes",
"isort",
"mypy",
"pytest",
"pytest-cov",
"tox",
]

[tool.setuptools.packages.find]
where = ["src"]
include = ["macta*"]
namespaces = false

[tool.setuptools.package-data]
"pkgname" = ["py.typed"]

[tool.pytest.ini_options]
addopts = "--cov=macta"
testpaths = ["tests"]
Expand All @@ -19,3 +75,6 @@ warn_return_any = true
warn_unreachable = true
warn_unused_configs = true
no_implicit_reexport = true

[tool.isort]
line_length = 120
5 changes: 0 additions & 5 deletions scripts/install_in_conda.sh

This file was deleted.

5 changes: 0 additions & 5 deletions scripts/install_in_conda_editable.sh

This file was deleted.

51 changes: 0 additions & 51 deletions setup.cfg

This file was deleted.

4 changes: 0 additions & 4 deletions setup.py

This file was deleted.

2 changes: 1 addition & 1 deletion src/macta/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from macta._annotate import annotate
from macta import tools, utils
from macta._annotate import annotate

__all__ = ['annotate', 'tools', 'utils']
22 changes: 12 additions & 10 deletions src/macta/_annotate.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
"""Encloses the `annotate` function, which runs all necessary annotation tools."""

import logging
from typing import Any, Dict, Optional, Union, Container
from typing import Any, Container, Dict, Optional, Union

import pandas as pd
from anndata import AnnData

from macta.tools import CelltypistInterface, CTAToolInterface
from macta.tools import AVAILABLE, CTAToolInterface


def annotate(expr_data: AnnData, ref_data: Union[AnnData, pd.DataFrame], annot_type: str, result_type: str = 'labels',
annot_tools: Union[str, Container[str]] = '*',
tool_interfaces: Optional[Dict[str, CTAToolInterface]] = None, **kwargs: Any) -> pd.DataFrame:
tool_interfaces: Optional[Dict[str, CTAToolInterface]] = None, **kwargs: Any
) -> Dict[str, Union[pd.Series, pd.DataFrame]]:
"""Runs MACTA annotation analysis.

Arguments:
Expand All @@ -27,22 +28,22 @@ def annotate(expr_data: AnnData, ref_data: Union[AnnData, pd.DataFrame], annot_t
"""

if tool_interfaces is None:
tool_interfaces = {'celltypist': CelltypistInterface()}
tool_interfaces = AVAILABLE

if annot_tools == '*':
annot_tools = list(tool_interfaces.keys())

df = pd.DataFrame()
results = {}

for tool_name, interface in tool_interfaces.items():
if tool_name not in annot_tools:
continue

result = run_tool(tool_name, interface, expr_data, ref_data, annot_type, result_type, **kwargs)
if result is not None:
df[tool_name] = result
results[tool_name] = result

return df
return results


def run_tool(tool_name: str, interface: CTAToolInterface, expr_data: AnnData, ref_data: AnnData, annot_type: str,
Expand All @@ -62,19 +63,20 @@ def run_tool(tool_name: str, interface: CTAToolInterface, expr_data: AnnData, re
A `pandas.Series` containing the results for each cell type if the run is valid. Otherwise, returns `None`
"""

if interface.requirements is None:
if interface._requirements is None:
logging.warn(f'{tool_name}: no requirements available. Proceeding with run.')
return None

if not interface.requirements.check(expr_data=expr_data, ref_data=ref_data, annot_type=annot_type,
result_type=result_type, **kwargs):
if not interface._requirements.check(expr_data=expr_data, ref_data=ref_data, annot_type=annot_type,
result_type=result_type, **kwargs):
logging.warn(f'{tool_name}: incompatible requirements. Skipping this tool.')
return None

try:
return interface.run_full(expr_data, ref_data, result_type, **kwargs)

except ImportError:
# NOTE this should never occur if the tool_interfaces are set according to what can be imported
logging.error(f'{tool_name}: required packages not imported. Skipping this tool.')

except Exception as e:
Expand Down
17 changes: 15 additions & 2 deletions src/macta/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
from contextlib import suppress
from typing import Dict

from macta.tools._cta_tool_interface import CTAToolInterface

from macta.tools._celltypist_interface import CelltypistInterface
AVAILABLE: Dict[str, CTAToolInterface] = {}
__all__ = ['AVAILABLE', 'CTAToolInterface']


with suppress(ImportError):
from macta.tools._celltypist_interface import CelltypistInterface
__all__.append('CelltypistInterface')
AVAILABLE['celltypist'] = CelltypistInterface()

__all__ = ['CTAToolInterface', 'CelltypistInterface']
with suppress(ImportError):
from macta.tools._scanvi import ScanviInterface
__all__.append('ScanviInterface')
AVAILABLE['scanvi'] = ScanviInterface()
49 changes: 30 additions & 19 deletions src/macta/tools/_celltypist_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from celltypist import models
from celltypist.classifier import AnnotationResult

from macta.tools import CTAToolInterface
from macta.utils import requirements as rqs
from macta.tools._cta_tool_interface import CTAToolInterface
from macta.utils.requirements import EqualityRequirement, RequirementList

# Disable `celltypist`'s trivial output logs
logging.getLogger(celltypist.__name__).setLevel(logging.ERROR)
Expand All @@ -21,12 +21,9 @@
class CelltypistInterface(CTAToolInterface):
"""Class for interfacing with the `celltypist` tool"""

# TODO: possibly do this using abstract properties
# Define requirements
def __post_init__(self) -> None:
self.requirements = rqs.RequirementList(
annot_type=rqs.EqualityRequirement('ref'),
)
_requirements = RequirementList(
annot_type=EqualityRequirement('ref'),
)

def annotate(self, expr_data: AnnData, ref_data: models.Model, **kwargs: Any) -> AnnotationResult:
"""Runs annotation using `celltypist`.
Expand All @@ -40,33 +37,47 @@ def annotate(self, expr_data: AnnData, ref_data: models.Model, **kwargs: Any) ->
"""
return celltypist.annotate(expr_data, model=ref_data, majority_voting=True)

def convert(self, results: AnnotationResult, convert_to: str, **kwargs: Any) -> pd.Series:
def convert(self, results: AnnotationResult, convert_to: str, **kwargs: Any) -> Union[pd.DataFrame, pd.Series]:
"""Converts `celltypist` results to standardized format.

Arguments:
results (AnnotationResult): celltypist results
convert_to (str): format to which `res` will be converted
convert_to (str): format to which `results` will be converted

Returns:
`pandas.Series` object containing data in the `convert_to` format
`pandas` object containing data in the `convert_to` format
"""

if convert_to == 'labels':
return results.predicted_labels.majority_voting

if convert_to == 'scores':
return results.probability_matrix

raise ValueError(f'{convert_to} is an invalid option for `convert_to`')

def preprocess_ref(self, ref_data: Union[AnnData, str], **kwargs: Any) -> models.Model:
def preprocess_ref(self, ref_data: Union[AnnData, str], update_models: bool = True, force_update: bool = True,
**kwargs: Any) -> models.Model:
"""Preprocesses the reference data into a `celltypist.models.Model`.

Arguments:
ref_data (Union[AnnData, str]): raw reference data. Can be an `AnnData` on which to train the model or a
`str` which represents the celltypist model name to load.
update_models (bool): if `True`, updates the celltypist model cache
force_update (bool): passed along directly to `celltypist.models.download_models`

Returns:
`celltypist.models.Model` to be used for annotation

kwargs = {
'update_models': True,
'force_update': True,
**kwargs
}
Notes:
- When `ref_data` is a `str`, this function force-updates all celltypist models. This will result in a
substantial delay while the data is being downloaded
- TODO add another case of ref_data: loading a trained model from a file
"""

if isinstance(ref_data, str):
if kwargs['update_models']:
models.download_models(force_update=kwargs['force_update'])
if update_models:
models.download_models(force_update=force_update)
return models.Model.load(model=ref_data)

elif isinstance(ref_data, AnnData):
Expand Down
Loading