diff --git a/docs/src/_static/ap.png b/docs/src/_static/ap.png index 33d7277..4a3ea63 100644 Binary files a/docs/src/_static/ap.png and b/docs/src/_static/ap.png differ diff --git a/docs/src/_static/nc_ec50.png b/docs/src/_static/nc_ec50.png new file mode 100644 index 0000000..3e28ef8 Binary files /dev/null and b/docs/src/_static/nc_ec50.png differ diff --git a/docs/src/_static/nc_multiomics.png b/docs/src/_static/nc_multiomics.png new file mode 100644 index 0000000..a63940a Binary files /dev/null and b/docs/src/_static/nc_multiomics.png differ diff --git a/docs/src/_static/nc_offtarget.png b/docs/src/_static/nc_offtarget.png new file mode 100644 index 0000000..1a6ad16 Binary files /dev/null and b/docs/src/_static/nc_offtarget.png differ diff --git a/docs/src/_static/nc_pathway.png b/docs/src/_static/nc_pathway.png new file mode 100644 index 0000000..8bc73da Binary files /dev/null and b/docs/src/_static/nc_pathway.png differ diff --git a/docs/src/_static/ppr.png b/docs/src/_static/ppr.png index 16fce71..6e065a7 100644 Binary files a/docs/src/_static/ppr.png and b/docs/src/_static/ppr.png differ diff --git a/docs/src/_static/reach.png b/docs/src/_static/reach.png index 1d41bab..c51e8c4 100644 Binary files a/docs/src/_static/reach.png and b/docs/src/_static/reach.png differ diff --git a/docs/src/_static/sign.png b/docs/src/_static/sign.png index 9359ca2..2cca343 100644 Binary files a/docs/src/_static/sign.png and b/docs/src/_static/sign.png differ diff --git a/docs/src/_static/sp.png b/docs/src/_static/sp.png index 1e48519..4045304 100644 Binary files a/docs/src/_static/sp.png and b/docs/src/_static/sp.png differ diff --git a/docs/src/benchmarks.rst b/docs/src/benchmarks.rst new file mode 100644 index 0000000..8067e9e --- /dev/null +++ b/docs/src/benchmarks.rst @@ -0,0 +1,106 @@ +##################### +Evaluation strategies +##################### + +One of the main aims of NetworkCommons is to provide a comprehesive set of metrics to evaluate the performance of the different network inference methodologies. +Currently, we provide three different evaluation strategies. +If you want to contribute with your own, please check our :doc:`Contribution guidelines `. + +.. _eval-offtarget: + +------------------ +Offtarget recovery +------------------ + +**Data**: perturbational scenarios, e.g a drug perturbation, for which there are differential expression profiles between control and drug-perturbed samples. +See :ref:`PANACEA `. + +**Assumption**: In this setting, we assume that, in a perturbational context, the effects that are measured via omics data is not only a product of the perturbation origin +(e.g KO, KD, drug perturbation), but also of other origins of perturbation that are not directly targeted by the perturbation agent (e.g a drug offtarget). + +.. image:: ./_static/nc_offtarget.png + :alt: Evaluation based on offtarget recovery + :width: 1000px + +**Performance metric:** Share (%) of offtargets recovered in the solution network + +.. note:: + Methods that recover a higher share of offtargets, compared to a random control, will be more successful in contextualising the perturbation, since the method incorporates + the offtargets' effect. + +**Example:** :doc:`Vignette 1: A simple example ` + +.. _eval-ec50: + +------------------------------------------------ +Phosphorylation sensitivity to drug perturbation +------------------------------------------------ + +**Data**: phosphoproteomics dose-response curves, EC50 values, time-course data, +See :ref:`DecryptM `. + +**Assumption**: In this setting, we assume that, in a perturbational context, those elements in a network that respond quicker to a perturbation (have a lower EC50) will be more +important in the contextualisation of said perturbation + +**Performance metric:** EC50 values for nodes included and excluded of the solution network. + +.. image:: ./_static/nc_ec50.png + :alt: Evaluation based on ensitivity to drug perturbation + :width: 1000px + +.. note:: + Methods producing result networks whose nodes have a low average EC50 (compared to nodes not included in the network) are better performers that those producing network + where this difference (EC50_in - EC50_out) is not that big. + +**Example:** :doc:`Vignette 3: Sensitive response to drug perturbation using phosphoproteomics ` + +.. _eval-pathway: + +--------------------------- +Pathway enrichment analysis +--------------------------- + +**Data**: perturbational scenarios, dysregulation (e.g cancer basal profiles), basal profiles (e.g tissue specific profiles) +See :ref:`PANACEA `. + +**Assumption**: In this setting, we use the nodes of the subnetworks to perform Overrepresentation Analysis against a set of predefined gene sets, among which we expect one to be especially represented +(for example, a specific pathway will be overrepresented if said pathway is perturbed, or is especially active/inactive in a given profile) + +**Performance metric:** rank of the selected gene set among all gene sets, according to ORA score + +.. image:: ./_static/nc_pathway.png + :alt: Evaluation based on pathway enrichment + :width: 1000px + +.. note:: + Having preselected a gene set of interest, the methods producing networks in which the said geneset is ranked high, according to their ORA score, against others, will have a better performance. + +**Example:** :doc:`Vignette 1: A simple example ` + +.. _eval-multiomics: + +-------------------------------- +Recovery of dysregulated kinases +-------------------------------- + +**Data**: perturbational scenarios, dysregulation (e.g cancer basal profiles) +See :ref:`CPTAC `. + +**Assumption**: In this setting, we use three different types of omics data: + +* **Proteomics**: we identified the most differentially abundant receptors in the proteomics profiles between healthy and tumor samples. We assume that if they are differentially abundant, they will be activated/inhibited. +* **Transcriptomics**: we performed TF enrichment analysis, in order to get the TFs that are dysregulated in the tumor samples compared to the healthy control. +* **Phosphoproteomics**: we performed kinase activity estimation and then evaluate the level of dysregulation in the resulting subnetwork. + +**Performance metric:** difference between kinase activity score in the solution network and the overall PKN. + +.. image:: ./_static/nc_multiomics.png + :alt: Evaluation based on ensitivity to drug perturbation + :width: 1000px + +.. note:: + Methods whose result subnetworks have an average higher kinase activity score, compared to the overall PKN, will be better performers. + +**Example:** :doc:`Vignette 4: Recovery of dysregulated kinases in response to cancer mutations ` + + diff --git a/docs/src/contents.rst b/docs/src/contents.rst index bc47ada..7b98e84 100644 --- a/docs/src/contents.rst +++ b/docs/src/contents.rst @@ -16,6 +16,7 @@ NetworkCommons: Table of Contents datasets methods + benchmarks .. toctree:: @@ -24,6 +25,7 @@ NetworkCommons: Table of Contents guidelines/guide_1_data guidelines/guide_2_methods + guidelines/guide_3_eval .. toctree:: diff --git a/docs/src/datasets.rst b/docs/src/datasets.rst index bd9d056..24da76e 100644 --- a/docs/src/datasets.rst +++ b/docs/src/datasets.rst @@ -2,13 +2,17 @@ Data #### NetworkCommons provides a collection of omics datasets and prior knowledge resources. The datasets are available in the form of files that can be downloaded and used for further analysis. The prior knowledge resources are available in the form of networks (either Network objects or pd.DataFrames). -All the data can be accessed via the NetworkCommons API. +All the data can be accessed via the NetworkCommons API. +If you want to contribute with your own, please check our :doc:`Contribution guidelines `. + +.. _details-omics: ---------- Omics data ---------- Below, we provide a list of all the omics datasets currently available in NetworkCommons. For each data, we provide a link to the original publication, a description, processing (if applicable), and a link to the data location. +.. _details-decryptm: DecryptM -------- @@ -26,6 +30,7 @@ Networkcommons contains the files containing, per phosphosite, EC50 values obtai **Functions:** See API documentation for :ref:`DecryptM `. +.. _details-panacea: PANACEA ------- @@ -49,6 +54,7 @@ in `decoupler `_. **Functions:** See API documentation for :ref:`PANACEA `. +.. _details-cptac: CPTAC ----- @@ -67,6 +73,8 @@ can be found in the STAR Methods of `'Proteogenomic Data and Resources for Pan-C **Functions:** See API documentation for :ref:`CPTAC `. +.. _details-nci60: + NCI60 ----- @@ -82,11 +90,15 @@ NCI60 **Functions:** See API documentation for :ref:`NCI60 `. +.. _details-pk: + --------------- Prior Knowledge --------------- Below, we provide a list of all the prior knowledge resources currently available in NetworkCommons. For each resource, we provide a description and a link to the original publication. +.. _details-omnipath: + OmniPath -------- @@ -101,6 +113,8 @@ Our aim is to expand the API to more data sources within OmniPath. For more info **Functions:** See API documentation for :ref:`Prior knowledge `. +.. _details-liana: + Liana ----- @@ -114,6 +128,8 @@ Liana **Functions:** See API documentation for :ref:`Prior knowledge `. +.. _details-phosphositeplus: + PhosphositePlus --------------- diff --git a/docs/src/guidelines/guide_1_data.ipynb b/docs/src/guidelines/guide_1_data.ipynb index b167b70..dd4ed2b 100644 --- a/docs/src/guidelines/guide_1_data.ipynb +++ b/docs/src/guidelines/guide_1_data.ipynb @@ -11,7 +11,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Thank you very much for considering contributing to the data collection of **NetworkCommons**! In order to make the resource as user-friendly as possible, we aim to be as transparent as possible, which means that all contributions should contain at least the following elements.\n", + "Thank you very much for considering contributing to the data collection of **NetworkCommons**! In order to make the resource as user-friendly as possible, we aim to be as transparent as possible, which means that all contributions should contain at least the following elements. For other examples, see [the Datasets details.](../datasets.html)\n", "\n", "## 1. Data information\n", "* Experimental design: number of samples, number of experiments (if applicable), confounding factors\n", diff --git a/docs/src/guidelines/guide_2_methods.ipynb b/docs/src/guidelines/guide_2_methods.ipynb deleted file mode 100644 index 7bf7d26..0000000 --- a/docs/src/guidelines/guide_2_methods.ipynb +++ /dev/null @@ -1,115 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Contribution's guideline: Methods" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Thank you very much for considering contributing to the methods collection of **NetworkCommons**! For methods, it is especially important that inputs and outputs are compatible with the rest of the package, the purpose is stated and the assumptions of the method are clear." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. API implementation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* Every new method should be implemented in a separate file (e.g `_moon.py`) inside `/networkcommons/methods/`. \n", - "* Contributors can then implement their own set of functionalities and expose those necessary to the public API via the `__all__` variable (see other files for examples).\n", - "* The input of the overall pipeline must be at least a `Network` object, and its overall output should return at least a `Network` object containing the contextualised network. This does not apply to intermediate functions (e.g `Network` --function 1--> `pd.DataFrame` --function 2--> `Network`) in case of a pipeline containing several functions, such as MOON." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Documentation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the `./docs/src/methods.rst` file, contributors should add:\n", - "\n", - "* The description of the method\n", - "* A figure showcasing the basics (if possible)\n", - "* Input/output definition\n", - "* Link to publication and repository (if available)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Functions should be documented using [Google style Python docstrings](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In `./docs/src/api.rst`, contributors should add a new documentation module that contains the new classes/functions implemented:" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - " New Method\n", - " ~~~~\n", - " .. module::networkcommons.methods\n", - " .. currentmodule:: networkcommons\n", - "\n", - " .. autosummary::\n", - " :toctree: api\n", - " :recursive:\n", - "\n", - " methods.run_shortest_paths\n", - " methods.run_sign_consistency\n", - " methods.run_reachability_filter\n", - " methods.run_all_paths\n", - " methods.compute_all_paths\n", - " methods.add_pagerank_scores\n", - " methods.compute_ppr_overlap" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "networkcommons-DX9y6Uxu-py3.10", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/src/guidelines/guide_2_methods.rst b/docs/src/guidelines/guide_2_methods.rst new file mode 100644 index 0000000..8a000a9 --- /dev/null +++ b/docs/src/guidelines/guide_2_methods.rst @@ -0,0 +1,35 @@ +################################# +Contribution's guideline: Methods +################################# + +Thank you very much for considering contributing to the methods collection of **NetworkCommons**! For methods, it is especially important that inputs and outputs are +compatible with the rest of the package, the purpose is stated and the assumptions of the method are clear. + + +---------------- +1. Documentation +---------------- + +In the :doc:`./docs/src/methods.rst file <../methods>`, contributors should add: + +* The description of the method +* A figure showcasing the basics (if possible) +* Input/output definition +* Link to publication and repository (if available) + +Functions should be documented using `Google style Python docstrings `_. + +In :doc:`./docs/src/api.rst file <../api>`, contributors should add a new documentation module that contains the new classes/functions implemented: + +.. literalinclude:: ../api.rst + :language: rest + :lines: 40-53 + +------ +2. API +------ + +* Every new method should be implemented in a separate file (e.g `_moon.py`) inside `/networkcommons/methods/`. +* Contributors can then implement their own set of functionalities and expose those necessary to the public API via the `__all__` variable (see other files for examples). +* The input of the overall pipeline must be at least a `Network` object, and its overall output should return at least a `Network` object containing the contextualised network. +This does not apply to intermediate functions (e.g `Network` --function 1--> `pd.DataFrame` --function 2--> `Network`) in case of a pipeline containing several functions, such as MOON. \ No newline at end of file diff --git a/docs/src/guidelines/guide_3_eval.rst b/docs/src/guidelines/guide_3_eval.rst new file mode 100644 index 0000000..0d1b6ec --- /dev/null +++ b/docs/src/guidelines/guide_3_eval.rst @@ -0,0 +1,37 @@ +############################################### +Contribution's guideline: Evaluation strategies +############################################### + +Thank you for considering contributing to the evaluation strategies. To implement a benchmark strategy, we need to clearly state the goals +and assumptions behind the strategy, define suitable datasets and define one (or more) performance metrics. For other examples, see other :doc:`Evaluation strategies <../benchmarks>`. + +---------------- +1. Documentation +---------------- + +Each new benchmark strategy should inform of the following points: + +* **Data:** which types of data/scenarios can be used for this strategy. +* **Assumption:** this is the most important part. Here, we define the idea behind the strategy, you can think of it as a small workflow draft. +* **Performance metric:** which metric we will use to rank the methods. +* **A note block**: here, contributors can explain in a nutshell how this evaluation metric can "differentiate" good and bad performers. It acts as a summary of the aforementioned points. + +For example: + +.. literalinclude:: ../benchmarks.rst + :language: rest + :lines: 69-88 + + +------ +2. API +------ + +* New strategies can be included in a separate file (e.g ``_eval1.py``) inside the ``networkcommons.eval`` module. +* Contributors can then implement their own set of functionalities and expose those necessary to the public API via the ``__all__`` variable (see other files for examples). +* The input must be at least a ``Network`` or dict of ``Network`` objects (``{'name1': Network1, 'name2': Network2, ...}``). The output can be anything, but ideally a ``pandas.DataFrame``, +with columns 'network' containing the network ID or name, and a number of columns from the implemented metric(s). + + + + diff --git a/docs/src/methods.rst b/docs/src/methods.rst index 53d8821..5b00bb7 100644 --- a/docs/src/methods.rst +++ b/docs/src/methods.rst @@ -2,6 +2,9 @@ Methods ####### +Here you can find a collection of the methods implemented in **NetworkCommons**, along with detailed descriptions. +If you want to contribute with your own, please check our :doc:`Contribution guidelines `. + ------------------- Topological methods ------------------- diff --git a/docs/src/vignettes/1_simple_example.ipynb b/docs/src/vignettes/1_simple_example.ipynb index 7211816..bf60276 100644 --- a/docs/src/vignettes/1_simple_example.ipynb +++ b/docs/src/vignettes/1_simple_example.ipynb @@ -525,7 +525,7 @@ "metadata": {}, "outputs": [], "source": [ - "decoupler_input = nc._utils.decoupler_formatter(results, 'stat')\n", + "decoupler_input = nc.utils.decoupler_formatter(results, 'stat')\n", "collectri_net = dc.get_collectri()\n", "dc_estimates, dc_pvals = dc.run_ulm(decoupler_input, collectri_net)" ] diff --git a/networkcommons/__init__.py b/networkcommons/__init__.py index 6f1c9df..95a5ba1 100644 --- a/networkcommons/__init__.py +++ b/networkcommons/__init__.py @@ -39,7 +39,7 @@ from ._session import log, _log, session from ._conf import config, setup -from . import _utils as utils +from . import utils as utils _MODULES = [ diff --git a/networkcommons/data/network/_moon.py b/networkcommons/data/network/_moon.py index e5c30b2..4807607 100644 --- a/networkcommons/data/network/_moon.py +++ b/networkcommons/data/network/_moon.py @@ -23,7 +23,7 @@ import numpy as np import pandas as pd -from networkcommons import _utils +from networkcommons import utils from . import _omnipath from . import _liana diff --git a/networkcommons/eval/_metrics.py b/networkcommons/eval/_metrics.py index 47ae5dd..c13346f 100644 --- a/networkcommons/eval/_metrics.py +++ b/networkcommons/eval/_metrics.py @@ -41,7 +41,7 @@ import decoupler as dc import numpy as np -import networkcommons._utils as utils +import networkcommons.utils as utils import random diff --git a/networkcommons/methods/_causal.py b/networkcommons/methods/_causal.py index 173ee15..7db2e14 100644 --- a/networkcommons/methods/_causal.py +++ b/networkcommons/methods/_causal.py @@ -30,7 +30,7 @@ import corneto as cn import corneto.contrib.networkx as cn_nx -from .. import _utils +from .. import utils def run_corneto_carnival(network, @@ -53,7 +53,7 @@ def run_corneto_carnival(network, nx.Graph: The subnetwork containing the paths found by CARNIVAL. list: A list containing the paths found by CARNIVAL. """ - corneto_net = _utils.to_cornetograph(network) + corneto_net = utils.to_cornetograph(network) problem, graph = cn.methods.runVanillaCarnival( perturbations=source_dict, @@ -68,7 +68,7 @@ def run_corneto_carnival(network, cn.methods.carnival.get_selected_edges(problem, graph), ) - network_nx = _utils.to_networkx(network_sol, skip_unsupported_edges=True) + network_nx = utils.to_networkx(network_sol, skip_unsupported_edges=True) network_nx.remove_nodes_from(['_s', '_pert_c0', '_meas_c0']) diff --git a/networkcommons/methods/_graph.py b/networkcommons/methods/_graph.py index 85ae249..94af139 100644 --- a/networkcommons/methods/_graph.py +++ b/networkcommons/methods/_graph.py @@ -32,7 +32,7 @@ import networkx as nx import numpy as np -from networkcommons import _utils +from networkcommons import utils from networkcommons._session import session as _session from collections import defaultdict, Counter @@ -75,7 +75,7 @@ def run_shortest_paths(network, source_dict, target_dict, verbose=False): # _session.log_traceback(console = verbose) pass - subnetwork = _utils.get_subnetwork(network, shortest_paths_res) + subnetwork = utils.get_subnetwork(network, shortest_paths_res) return subnetwork, shortest_paths_res @@ -148,7 +148,7 @@ def run_sign_consistency(network, paths, source_dict, target_dict=None): if np.sign(source_sign * product_sign) == np.sign(target_sign): sign_consistency_res.append(path) - subnetwork = _utils.get_subnetwork(network, sign_consistency_res) + subnetwork = utils.get_subnetwork(network, sign_consistency_res) if not target_dict: return subnetwork, sign_consistency_res, inferred_target_sign @@ -215,7 +215,7 @@ def run_all_paths(network, # _session.log_traceback(console = verbose) pass - subnetwork = _utils.get_subnetwork(network, all_paths_res) + subnetwork = utils.get_subnetwork(network, all_paths_res) return subnetwork, all_paths_res diff --git a/networkcommons/network/_network.py b/networkcommons/network/_network.py index c5e890a..b80d813 100644 --- a/networkcommons/network/_network.py +++ b/networkcommons/network/_network.py @@ -33,7 +33,7 @@ from networkcommons.data import _network as _universe from networkcommons.noi._noi import Noi -from networkcommons import _utils +from networkcommons import utils class Network: @@ -92,21 +92,21 @@ def _from_corneto(self): def _from_networkx(self): - self._co = _utils.to_cornetograph(self.universe) + self._co = utils.to_cornetograph(self.universe) self._attrs_from_corneto() def _from_pandas(self): - nxgraph = _utils.network_from_df(self.universe) - self._co = _utils.to_cornetograph(nxgraph) + nxgraph = utils.network_from_df(self.universe) + self._co = utils.to_cornetograph(nxgraph) self._attrs_from_corneto() def _attrs_from_corneto(self): - self._nodes = _utils.node_attrs_from_corneto(self._co) - self._edges = _utils.edge_attrs_from_corneto(self._co) + self._nodes = utils.node_attrs_from_corneto(self._co) + self._edges = utils.edge_attrs_from_corneto(self._co) def as_igraph(self, attrs: str | list[str]) -> "igraph.Graph": diff --git a/networkcommons/_utils.py b/networkcommons/utils.py similarity index 100% rename from networkcommons/_utils.py rename to networkcommons/utils.py diff --git a/networkcommons/visual/_rnaseq.py b/networkcommons/visual/_rnaseq.py index 3ef1cf6..1c01ad8 100644 --- a/networkcommons/visual/_rnaseq.py +++ b/networkcommons/visual/_rnaseq.py @@ -38,7 +38,7 @@ import matplotlib.pyplot as plt import seaborn as sns from sklearn import decomposition as sklearn_decomp -from networkcommons._utils import handle_missing_values +from networkcommons.utils import handle_missing_values def plot_density(df, diff --git a/tests/test_utils.py b/tests/test_utils.py index 1b53667..2982259 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,7 +4,7 @@ import corneto as cn from unittest.mock import patch import pytest -import networkcommons._utils as utils +import networkcommons.utils as utils import pygraphviz as pgv @@ -107,7 +107,7 @@ def test_to_networkx_when_not_supported(): def test_read_network_from_file(): - with patch('pandas.read_csv') as mock_read_csv, patch('networkcommons._utils.network_from_df') as mock_network_from_df: + with patch('pandas.read_csv') as mock_read_csv, patch('networkcommons.utils.network_from_df') as mock_network_from_df: mock_read_csv.return_value = pd.DataFrame({'source': ['a'], 'target': ['b']}) utils.read_network_from_file('dummy_path') mock_network_from_df.assert_called_once()