AleksBekker · AleksBekker · Aug 4, 2023 · Aug 2, 2023 · Aug 3, 2023 · Aug 3, 2023
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,6 @@
+[flake8]
+ignore = E501,W504
+max-line-length = 120
+per-file-ignores = __init__.py:F401
+inline-quotes = single
+multiline-quotes = double
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -9,13 +9,13 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest]
+        os: [ubuntu-latest]
         python-version: ['3.8', '3.9', '3.10']
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies

diff --git a/Pipfile b/Pipfile
@@ -8,7 +8,9 @@ anndata = "==0.8.0"
 celltypist = "==1.3.0"
 pandas = "==2.0.0"
 pydantic = ">=2.0"
-scanpy = "==1.9.3"
+scanpy = "*"
+scvi-tools = "*"
+scarches = "*"
 
 [dev-packages]
 flake8 = "*"
@@ -17,6 +19,7 @@ mypy = "*"
 pytest = "*"
 pytest-cov = "*"
 tox = "*"
+isort = "*"
 
 [requires]
 python_version = "3.8"

diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,63 @@
 [build-system]
-requires = ["setuptools>=42.0", "wheel"]
+requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 
+[project]
+name = "macta"
+version = "0.0.4"
+authors = [{ name = "Aleksandr Bekker", email = "alekso.bekker@gmail.com" }]
+description = "Annotation tools for the MACTA suite"
+keywords = ["cell type annotation", "single cell"]
+readme = "README.md"
+requires-python = ">=3.8"
+license = { text = "GPL3" }
+classifiers = [
+    "Programming Language :: Python :: 3 :: Only",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+]
+dependencies = [
+    "anndata>=0.8.0",
+    "pandas>=2.0.0",
+    "pydantic>=2.0.0",
+    "scanpy>=1.9.3",
+]
+
+[project.urls]
+"Homepage" = "https://github.com/AleksBekker/MACTA_py"
+
+[project.optional-dependencies]
+
+# Per-tool extras
+celltypist = ["celltypist>=1.3.0"]
+scanvi = ["scvi-tools", "scarches"]
+
+# Per-version extras
+py38 = ["macta[celltypist]"]
+py39 = ["macta[celltypist]"]
+py310 = ["macta[celltypist]"]
+torch38 = ["macta[scanvi]"]
+
+# Testing extras
+testing = [
+    "flake8",
+    "flake8-quotes",
+    "isort",
+    "mypy",
+    "pytest",
+    "pytest-cov",
+    "tox",
+]
+
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["macta*"]
+namespaces = false
+
+[tool.setuptools.package-data]
+"pkgname" = ["py.typed"]
+
 [tool.pytest.ini_options]
 addopts = "--cov=macta"
 testpaths = ["tests"]
@@ -19,3 +75,6 @@ warn_return_any = true
 warn_unreachable = true
 warn_unused_configs = true
 no_implicit_reexport = true
+
+[tool.isort]
+line_length = 120
diff --git a/scripts/install_in_conda.sh b/scripts/install_in_conda.sh
diff --git a/scripts/install_in_conda_editable.sh b/scripts/install_in_conda_editable.sh
diff --git a/setup.cfg b/setup.cfg
diff --git a/setup.py b/setup.py
diff --git a/src/macta/__init__.py b/src/macta/__init__.py
@@ -1,4 +1,4 @@
-from macta._annotate import annotate
 from macta import tools, utils
+from macta._annotate import annotate
 
 __all__ = ['annotate', 'tools', 'utils']
diff --git a/src/macta/_annotate.py b/src/macta/_annotate.py
@@ -1,17 +1,18 @@
 """Encloses the `annotate` function, which runs all necessary annotation tools."""
 
 import logging
-from typing import Any, Dict, Optional, Union, Container
+from typing import Any, Container, Dict, Optional, Union
 
 import pandas as pd
 from anndata import AnnData
 
-from macta.tools import CelltypistInterface, CTAToolInterface
+from macta.tools import AVAILABLE, CTAToolInterface
 
 
 def annotate(expr_data: AnnData, ref_data: Union[AnnData, pd.DataFrame], annot_type: str, result_type: str = 'labels',
              annot_tools: Union[str, Container[str]] = '*',
-             tool_interfaces: Optional[Dict[str, CTAToolInterface]] = None, **kwargs: Any) -> pd.DataFrame:
+             tool_interfaces: Optional[Dict[str, CTAToolInterface]] = None, **kwargs: Any
+             ) -> Dict[str, Union[pd.Series, pd.DataFrame]]:
     """Runs MACTA annotation analysis.
 
     Arguments:
@@ -27,22 +28,22 @@ def annotate(expr_data: AnnData, ref_data: Union[AnnData, pd.DataFrame], annot_t
     """
 
     if tool_interfaces is None:
-        tool_interfaces = {'celltypist': CelltypistInterface()}
+        tool_interfaces = AVAILABLE
 
     if annot_tools == '*':
         annot_tools = list(tool_interfaces.keys())
 
-    df = pd.DataFrame()
+    results = {}
 
     for tool_name, interface in tool_interfaces.items():
         if tool_name not in annot_tools:
             continue
 
         result = run_tool(tool_name, interface, expr_data, ref_data, annot_type, result_type, **kwargs)
         if result is not None:
-            df[tool_name] = result
+            results[tool_name] = result
 
-    return df
+    return results
 
 
 def run_tool(tool_name: str, interface: CTAToolInterface, expr_data: AnnData, ref_data: AnnData, annot_type: str,
@@ -62,19 +63,20 @@ def run_tool(tool_name: str, interface: CTAToolInterface, expr_data: AnnData, re
         A `pandas.Series` containing the results for each cell type if the run is valid. Otherwise, returns `None`
     """
 
-    if interface.requirements is None:
+    if interface._requirements is None:
         logging.warn(f'{tool_name}: no requirements available. Proceeding with run.')
         return None
 
-    if not interface.requirements.check(expr_data=expr_data, ref_data=ref_data, annot_type=annot_type,
-                                        result_type=result_type, **kwargs):
+    if not interface._requirements.check(expr_data=expr_data, ref_data=ref_data, annot_type=annot_type,
+                                         result_type=result_type, **kwargs):
         logging.warn(f'{tool_name}: incompatible requirements. Skipping this tool.')
         return None
 
     try:
         return interface.run_full(expr_data, ref_data, result_type, **kwargs)
 
     except ImportError:
+        # NOTE this should never occur if the tool_interfaces are set according to what can be imported
         logging.error(f'{tool_name}: required packages not imported. Skipping this tool.')
 
     except Exception as e:

diff --git a/src/macta/tools/__init__.py b/src/macta/tools/__init__.py
@@ -1,5 +1,18 @@
+from contextlib import suppress
+from typing import Dict
+
 from macta.tools._cta_tool_interface import CTAToolInterface
 
-from macta.tools._celltypist_interface import CelltypistInterface
+AVAILABLE: Dict[str, CTAToolInterface] = {}
+__all__ = ['AVAILABLE', 'CTAToolInterface']
+
+
+with suppress(ImportError):
+    from macta.tools._celltypist_interface import CelltypistInterface
+    __all__.append('CelltypistInterface')
+    AVAILABLE['celltypist'] = CelltypistInterface()
 
-__all__ = ['CTAToolInterface', 'CelltypistInterface']
+with suppress(ImportError):
+    from macta.tools._scanvi import ScanviInterface
+    __all__.append('ScanviInterface')
+    AVAILABLE['scanvi'] = ScanviInterface()
diff --git a/src/macta/tools/_celltypist_interface.py b/src/macta/tools/_celltypist_interface.py
@@ -10,8 +10,8 @@
 from celltypist import models
 from celltypist.classifier import AnnotationResult
 
-from macta.tools import CTAToolInterface
-from macta.utils import requirements as rqs
+from macta.tools._cta_tool_interface import CTAToolInterface
+from macta.utils.requirements import EqualityRequirement, RequirementList
 
 # Disable `celltypist`'s trivial output logs
 logging.getLogger(celltypist.__name__).setLevel(logging.ERROR)
@@ -21,12 +21,9 @@
 class CelltypistInterface(CTAToolInterface):
     """Class for interfacing with the `celltypist` tool"""
 
-    # TODO: possibly do this using abstract properties
-    # Define requirements
-    def __post_init__(self) -> None:
-        self.requirements = rqs.RequirementList(
-            annot_type=rqs.EqualityRequirement('ref'),
-        )
+    _requirements = RequirementList(
+        annot_type=EqualityRequirement('ref'),
+    )
 
     def annotate(self, expr_data: AnnData, ref_data: models.Model, **kwargs: Any) -> AnnotationResult:
         """Runs annotation using `celltypist`.
@@ -40,33 +37,47 @@ def annotate(self, expr_data: AnnData, ref_data: models.Model, **kwargs: Any) ->
         """
         return celltypist.annotate(expr_data, model=ref_data, majority_voting=True)
 
-    def convert(self, results: AnnotationResult, convert_to: str, **kwargs: Any) -> pd.Series:
+    def convert(self, results: AnnotationResult, convert_to: str, **kwargs: Any) -> Union[pd.DataFrame, pd.Series]:
         """Converts `celltypist` results to standardized format.
 
         Arguments:
             results (AnnotationResult): celltypist results
-            convert_to (str): format to which `res` will be converted
+            convert_to (str): format to which `results` will be converted
 
         Returns:
-            `pandas.Series` object containing data in the `convert_to` format
+            `pandas` object containing data in the `convert_to` format
         """
 
         if convert_to == 'labels':
             return results.predicted_labels.majority_voting
 
+        if convert_to == 'scores':
+            return results.probability_matrix
+
         raise ValueError(f'{convert_to} is an invalid option for `convert_to`')
 
-    def preprocess_ref(self, ref_data: Union[AnnData, str], **kwargs: Any) -> models.Model:
+    def preprocess_ref(self, ref_data: Union[AnnData, str], update_models: bool = True, force_update: bool = True,
+                       **kwargs: Any) -> models.Model:
+        """Preprocesses the reference data into a `celltypist.models.Model`.
+
+        Arguments:
+            ref_data (Union[AnnData, str]): raw reference data. Can be an `AnnData` on which to train the model or a
+                `str` which represents the celltypist model name to load.
+            update_models (bool): if `True`, updates the celltypist model cache
+            force_update (bool): passed along directly to `celltypist.models.download_models`
+
+        Returns:
+            `celltypist.models.Model` to be used for annotation
 
-        kwargs = {
-            'update_models': True,
-            'force_update': True,
-            **kwargs
-        }
+        Notes:
+            - When `ref_data` is a `str`, this function force-updates all celltypist models. This will result in a
+                substantial delay while the data is being downloaded
+            - TODO add another case of ref_data: loading a trained model from a file
+        """
 
         if isinstance(ref_data, str):
-            if kwargs['update_models']:
-                models.download_models(force_update=kwargs['force_update'])
+            if update_models:
+                models.download_models(force_update=force_update)
             return models.Model.load(model=ref_data)
 
         elif isinstance(ref_data, AnnData):