Skip to content

Commit

Permalink
Update project structure
Browse files Browse the repository at this point in the history
  • Loading branch information
breimanntools committed Jun 29, 2024
1 parent a47b038 commit 42142e8
Show file tree
Hide file tree
Showing 15 changed files with 62 additions and 48 deletions.
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ If you use AAanalysis in your work, please cite the respective publication as fo

**CPP**:
Breimann and Kamp *et al.* (2024c),
*Complete γ-secretase substrate proteome revealed by explainable AI*, .. # Link if available
*Charting γ-secretase substrates by explainable AI*, .. # Link if available

**dPULearn**:
Breimann and Kamp *et al.* (2024c),
*Complete γ-secretase substrate proteome revealed by explainable AI*, .. # Link if available
*Charting γ-secretase substrates by explainable AI*, .. # Link if available
18 changes: 8 additions & 10 deletions aaanalysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
SequencePreprocessor)
from .feature_engineering import AAclust, AAclustPlot, SequenceFeature, NumericalFeature, CPP, CPPPlot
from .pu_learning import dPULearn, dPULearnPlot
from .explainable_ai import TreeModel
from .pertubation import AAMut, AAMutPlot, SeqMut, SeqMutPlot
from .plotting import (plot_get_clist, plot_get_cmap, plot_get_cdict,
plot_settings, plot_legend, plot_gcfs)
Expand Down Expand Up @@ -32,7 +33,7 @@
"AAMutPlot",
"SeqMut",
"SeqMutPlot",
# "TreeModel" # SHAP (explainable AI module)
"TreeModel",
# "ShapExplainer" # SHAP
"plot_get_clist",
"plot_get_cmap",
Expand All @@ -49,12 +50,11 @@

# Import of professional (pro) version features if dependencies are available
try:
from .explainable_ai import TreeModel, ShapExplainer
from .explainable_ai_pro import ShapExplainer
from .data_handling_pro import comp_seq_sim, filter_seq
from .show_html import display_df
# Extend the __all__ list with pro features if successful
__all__.extend(["TreeModel",
"ShapExplainer",
__all__.extend(["ShapExplainer",
"display_df",
"comp_seq_sim",
"filter_seq"])
Expand All @@ -76,9 +76,7 @@ def __call__(self, *args, **kwargs):
return UnavailableFeature

# Use the factory function to create placeholders for pro features
make_pro_feature("TreeModel")
make_pro_feature("ShapExplainer")
make_pro_feature("display_df")
make_pro_feature("comp_seq_sim")
make_pro_feature("comp_pw_seq_sim")
make_pro_feature("filter_seq")
ShapExplainer = make_pro_feature("ShapExplainer")
display_df = make_pro_feature("display_df")
comp_seq_sim = make_pro_feature("comp_seq_sim")
filter_seq = make_pro_feature("filter_seq")
10 changes: 9 additions & 1 deletion aaanalysis/_utils/check_models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""This is a script for scikit-learn model-specific check functions"""
import inspect
from inspect import isclass
import numpy as np

from ._utils import add_str

# Helper functions


Expand Down Expand Up @@ -72,3 +72,11 @@ def check_model_kwargs(model_class=None, model_kwargs=None, name_model_class="mo
if "random_state" not in model_kwargs and "random_state" in valid_args:
model_kwargs.update(dict(random_state=random_state))
return model_kwargs


def check_match_list_model_classes_kwargs(list_model_classes=None, list_model_kwargs=None):
"""Check length match of list_model_classes and list_model_kwargs"""
n_models = len(list_model_classes)
n_args = len(list_model_kwargs)
if n_models != n_args:
raise ValueError(f"Length of 'list_model_kwargs' (n={n_args}) should match to 'list_model_classes' (n{n_models}")
2 changes: 1 addition & 1 deletion aaanalysis/data_handling_pro/_filter_seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def filter_seq(df_seq: pd.DataFrame = None,
Warnings
--------
* This function requires `biopython`, which is automatically installed via `pip install aaanalysis[pro]`.
* CD-HIT and MMseq2 must be installed separately.
* CD-HIT is not available for Windows.
* This function requires `biopython`, which is automatically installed via `pip install aaanalysis[pro]`.
Examples
--------
Expand Down
2 changes: 0 additions & 2 deletions aaanalysis/explainable_ai/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from ._shap_explainer import ShapExplainer
from ._tree_model import TreeModel

__all__ = [
"ShapExplainer",
"TreeModel",
]
11 changes: 1 addition & 10 deletions aaanalysis/explainable_ai/_backend/check_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,11 @@
import aaanalysis.utils as ut


def check_match_list_model_classes_kwargs(list_model_classes=None, list_model_kwargs=None):
"""Check length match of list_model_classes and list_model_kwargs"""
n_models = len(list_model_classes)
n_args = len(list_model_kwargs)
if n_models != n_args:
raise ValueError(f"Length of 'list_model_kwargs' (n={n_args}) should match to 'list_model_classes' (n{n_models}")


def check_match_labels_X(labels=None, X=None):
"""Check if labels binary classification task labels"""
n_samples = X.shape[0]
# Accept float if fuzzy_labeling is True
str_add = "Consider setting 'fuzzy_labeling=True'."
labels = ut.check_labels(labels=labels, len_requiered=n_samples, str_add=str_add)
labels = ut.check_labels(labels=labels, len_requiered=n_samples)
unique_labels = set(labels)
if len(unique_labels) != 2:
raise ValueError(f"'labels' should contain 2 unique labels ({unique_labels})")
Expand Down
8 changes: 2 additions & 6 deletions aaanalysis/explainable_ai/_tree_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
This is a script for the frontend of the TreeModel class used to obtain Mote Carlo estimates of feature importance.
DEV: TODO features
a) TreeModel.fit: Add n_jobs as input
b) TreeModel.eval: Add n_features to output
"""
from typing import Optional, Dict, List, Tuple, Type, Union, Callable
Expand All @@ -13,8 +12,7 @@

import aaanalysis.utils as ut

from ._backend.check_models import (check_match_list_model_classes_kwargs,
check_match_labels_X,
from ._backend.check_models import (check_match_labels_X,
check_match_X_is_selected)
from ._backend.tree_model.tree_model_fit import fit_tree_based_models
from ._backend.tree_model.tree_model_predict_proba import monte_carlo_predict_proba
Expand Down Expand Up @@ -115,8 +113,6 @@ def check_match_df_feat_importance_arrays(df_feat=None, feat_importance=None, fe


# TODO split from shap explainer to be installed via aanalysis (not aaanalysis[pro])
# TODO manage aaanalysis[pro] (add info/warning in docu for every function/module whose dependencies are not installed)
# TODO e.g., seq_filter, comp_seq_sim, SHAP ...# II Main Functions
class TreeModel:
"""
Tree Model class: A wrapper for tree-based models to obtain Monte Carlo estimates of feature
Expand Down Expand Up @@ -196,7 +192,7 @@ def __init__(self,
list_model_kwargs = ut.check_list_like(name="list_model_kwargs", val=list_model_kwargs, accept_none=True)
if list_model_kwargs is None:
list_model_kwargs = [{} for _ in list_model_classes]
check_match_list_model_classes_kwargs(list_model_classes=list_model_classes, list_model_kwargs=list_model_kwargs)
ut.check_match_list_model_classes_kwargs(list_model_classes=list_model_classes, list_model_kwargs=list_model_kwargs)
_list_model_kwargs = []
for model_class, model_kwargs in zip(list_model_classes, list_model_kwargs):
ut.check_mode_class(model_class=model_class)
Expand Down
2 changes: 0 additions & 2 deletions aaanalysis/explainable_ai_pro/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from ._shap_explainer import ShapExplainer
from ._tree_model import TreeModel

__all__ = [
"ShapExplainer",
"TreeModel",
]
3 changes: 2 additions & 1 deletion aaanalysis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@
check_file_path_exists,
check_is_fasta)
from ._utils.check_models import (check_mode_class,
check_model_kwargs)
check_model_kwargs,
check_match_list_model_classes_kwargs)
from ._utils.check_plots import (check_fig,
check_ax,
check_figsize,
Expand Down
1 change: 1 addition & 0 deletions docs/requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Requires Python >= 3.9

# Additional dependencies for professional version (pip install aaanalysis[pro])
biopython>=1.8.3
shap>=0.44.0
ipython>=8.16.1
hypothesis>=6.86.2
Expand Down
4 changes: 2 additions & 2 deletions docs/source/index/citations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ If you use **AAanalysis** in your work, please cite the respective publication a

**CPP**:
[Breimann24c]_ Breimann and Kamp *et al.* (2024c),
*Complete γ-secretase substrate proteome revealed by explainable AI*, .. # Link if available
*Charting γ-secretase substrates by explainable AI*, .. # Link if available

**dPULearn**:
[Breimann24c]_ Breimann and Kamp *et al.* (2024c),
*Complete γ-secretase substrate proteome revealed by explainable AI*, .. # Link if available
*Charting γ-secretase substrates by explainable AI*, .. # Link if available
3 changes: 1 addition & 2 deletions docs/source/index/references.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ AAanalysis Algorithms
`bioRxiv <https://www.biorxiv.org/content/10.1101/2023.08.03.551768v1>`__.
.. [Breimann24c] Breimann and Kamp *et al.* (2024c),
*Complete γ-secretase substrate proteome revealed by explainable AI*,
.. # Link if available
*Charting γ-secretase substrates by explainable AI*, .. # Link if available
Sequence Algorithms
-------------------
Expand Down
4 changes: 2 additions & 2 deletions examples/data_handling/sp_get_aa_window.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
}
],
"source": [
"# Get amino acid window of size 5\n",
"# Get amino acid window of size 6\n",
"window = sp.get_aa_window(seq=seq, pos_stop=5)\n",
"print(window)"
],
Expand Down Expand Up @@ -88,7 +88,7 @@
}
],
"source": [
"# Get amino acid window of size 3\n",
"# Get amino acid window of size 4\n",
"window = sp.get_aa_window(seq=seq, pos_start=2, pos_stop=5)\n",
"print(window)"
],
Expand Down
36 changes: 30 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,15 @@ description = "Python framework for interpretable protein prediction"
authors = ["Stephan Breimann <stephanbreimann@gmail.de>"]
license = "BSD-3-Clause"
readme = "README.rst"
include = ["data/*.xlsx", "data/benchmarks/*.tsv", "data/benchmarks/*.xlsx"]
homepage = "https://aaanalysis.readthedocs.io"
keywords = ["protein", "prediction", "bioinformatics", "machine learning", "interpretable AI"]

# Include additional files
include = [
"data/*.xlsx",
"data/benchmarks/*.tsv",
"data/benchmarks/*.xlsx"
]

# Add classifiers to provide more details about the package (used by PyPI)
classifiers = [
Expand All @@ -36,7 +44,6 @@ classifiers = [
[tool.poetry.dependencies]
# Core dependencies (via pip install aaanalysis)
python = "^3.9"
biopython = "^1.8.3"
cycler = "^0.11.0"
et-xmlfile = "^1.1.0"
fonttools = "^4.37.1"
Expand All @@ -59,23 +66,24 @@ scipy = "^1.10.0"
seaborn = "^0.13.2"
six = "^1.16.0"
threadpoolctl = "^3.1.0"

# Optional professional dependencies (via pip install aaanalysis[pro])
biopython = { version = "^1.8.3", optional = true }
shap = { version = "^0.44.0", optional = true }
ipython = { version = "^8.16.1", optional = true }
hypothesis = { version = "^6.86.2", optional = true }
pytest = { version = "^7.4.2", optional = true }
UpSetPlot = { version = "^0.8.0", optional = true }

[tool.poetry.extras]
pro = ["shap", "ipython", "hypothesis", "pytest", "UpSetPlot"]
pro = ["shap", "biopython", "ipython", "hypothesis", "pytest", "UpSetPlot"]

# Project URLs
[tool.poetry.urls]
"Repository" = "https://github.com/breimanntools/aaanalysis"
"Documentation" = "https://aaanalysis.readthedocs.io"

# If you use a tool for linting or formatting, you can add its configurations here.
# For example, if you use `black` for formatting:
# Configuration for black (code formatter)
[tool.black]
line-length = 88
exclude = '''
Expand All @@ -85,8 +93,24 @@ exclude = '''
| \.eggs
| \.mypy_cache
| \.pytest_cache
| \__pycache__
| __pycache__
| build
| dist
)/
'''

# Configuration for isort (import sorting)
[tool.isort]
profile = "black"
line_length = 88

# Configuration for flake8 (linting)
[tool.flake8]
max-line-length = 88
exclude = '''
.git,
__pycache__,
build,
dist,
venv
'''
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Requires Python >= 3.9

# Dependecies for core version (pip install aaanalysis)
biopython>=1.8.3
cycler>=0.11.0
et-xmlfile>=1.1.0
fonttools>=4.37.1
Expand All @@ -26,6 +25,7 @@ six>=1.16.0
threadpoolctl>=3.1.0

# Additional dependencies for professional version (pip install aaanalysis[pro])
biopython>=1.8.3
shap>=0.44.0
ipython>=8.16.1
hypothesis>=6.86.2
Expand Down

0 comments on commit 42142e8

Please sign in to comment.