diff --git a/.gitignore b/.gitignore index e89c64f..92b50e0 100644 --- a/.gitignore +++ b/.gitignore @@ -35,4 +35,4 @@ api report* trace* work/ -*.png +test.ipynb diff --git a/docs/src/_static/ap.png b/docs/src/_static/ap.png new file mode 100644 index 0000000..33d7277 Binary files /dev/null and b/docs/src/_static/ap.png differ diff --git a/docs/src/_static/ppr.png b/docs/src/_static/ppr.png new file mode 100644 index 0000000..16fce71 Binary files /dev/null and b/docs/src/_static/ppr.png differ diff --git a/docs/src/_static/reach.png b/docs/src/_static/reach.png new file mode 100644 index 0000000..1d41bab Binary files /dev/null and b/docs/src/_static/reach.png differ diff --git a/docs/src/_static/sign.png b/docs/src/_static/sign.png new file mode 100644 index 0000000..9359ca2 Binary files /dev/null and b/docs/src/_static/sign.png differ diff --git a/docs/src/_static/sp.png b/docs/src/_static/sp.png new file mode 100644 index 0000000..1e48519 Binary files /dev/null and b/docs/src/_static/sp.png differ diff --git a/docs/src/api.rst b/docs/src/api.rst index b4d6a22..f5178b4 100644 --- a/docs/src/api.rst +++ b/docs/src/api.rst @@ -60,9 +60,6 @@ CORNETO :recursive: methods.run_corneto_carnival - methods.to_cornetograph - methods.to_networkx - Prior Knowledge @@ -120,7 +117,9 @@ PANACEA :toctree: api :recursive: - data.omics.panacea + data.omics.panacea_experiments + data.omics.panacea_datatypes + data.omics.panacea_tables scPerturb ~~~~~~~~ @@ -164,17 +163,6 @@ NCI60 data.omics.nci60_table -Other -~~~~~~~~ -.. module::networkcommons.data.omics -.. currentmodule:: networkcommons - -.. autosummary:: - :toctree: api - :recursive: - - data.omics.moon - Evaluation and description ========================== @@ -231,3 +219,28 @@ Visualization visual.plot_density visual.plot_scatter visual.plot_rank + + +Utilities +========= + +.. module::networkcommons.utils +.. currentmodule:: networkcommons + +.. autosummary:: + :toctree: api + :recursive: + + + utils.to_cornetograph + utils.to_networkx + utils.read_network_from_file + utils.network_from_df + utils.get_subnetwork + utils.decoupler_formatter + utils.targetlayer_formatter + utils.handle_missing_values + utils.subset_df_with_nodes + utils.node_attrs_from_corneto + utils.edge_attrs_from_corneto + diff --git a/docs/src/contents.rst b/docs/src/contents.rst index 4be9548..bc47ada 100644 --- a/docs/src/contents.rst +++ b/docs/src/contents.rst @@ -10,12 +10,20 @@ NetworkCommons: Table of Contents installation api +.. toctree:: + :maxdepth: 2 + :caption: Details + + datasets + methods + .. toctree:: :maxdepth: 2 :caption: Contribution guidelines guidelines/guide_1_data + guidelines/guide_2_methods .. toctree:: diff --git a/docs/src/datasets.rst b/docs/src/datasets.rst new file mode 100644 index 0000000..6d7c8f6 --- /dev/null +++ b/docs/src/datasets.rst @@ -0,0 +1,114 @@ +#### +Data +#### +NetworkCommons provides a collection of omics datasets and prior knowledge resources. The datasets are available in the form of files that can be downloaded and used for further analysis. The prior knowledge resources are available in the form of networks (either Network objects or pd.DataFrames). +All the data can be accessed via the NetworkCommons API. + +---------- +Omics data +---------- +Below, we provide a list of all the omics datasets currently available in NetworkCommons. For each data, we provide a link to the original publication, a description, processing (if applicable), and a link to the data location. + + +DecryptM +-------- + +**Alias:** decryptm + +**Description:** Drug perturbation proteomics and phosphoproteomics data + +**Publication Link:** `Jana Zecha et al. Decrypting drug actions and protein modifications by dose- and time-resolved proteomics. Science 380,93-101(2023). `_ + +**Data location:** `PRIDE `_ + +**Detailed Description:** This dataset contains the profiling of 31 cancer drugs in 13 human cancer cell line models, resulting in 1.8 million dose-response curves. The data includes 47,502 regulated phosphopeptides, 7316 ubiquitinylated peptides, and 546 regulated acetylated peptides. +Networkcommons contains the files containing, per phosphosite, EC50 values obtained from fitting the intensity values of the 10 drug concentration points to a four-parameter logistic function. + + +PANACEA +------- + +**Alias:** panacea + +**Description:** Pancancer Analysis of Chemical Entity Activity RNA-Seq data + +**Publication Link:** `Eugene F. Douglass et al. A community challenge for a pancancer drug mechanism of action inference from perturbational profile data. Cell Reports Medicine (2022). `_ + +**Data location:** `NCBI GEO `_ + +**Detailed Description:** PANACEA contains dose-response and perturbational profiles for 32 kinase inhibitors in 11 cancer cell lines, in addition to a DMSO control. Originally, this resource served as the basis for a DREAM Challenge assessing the accuracy and sensitivity of computational algorithms for de novo drug polypharmacology predictions. +NetworkCommons provides raw files for countdata and metadata, as retrieved in the original page. In addition, differential expression and TF activity tables are provided. + +**Data processing:** The differential expression statistics were obtained via `FLOP `_, using FilterbyExpr and DESeq2, one of the top performer combinations in the benchmarking study. +The contrasts were set, per cell line, between each drug and the DMSO control. The TF activity tables were obtained also via `FLOP `_, using univariate linear models as implemented +in `decoupler `_. + + +CPTAC +----- + +**Alias:** CPTAC + +**Description:** Clinical Proteomic Tumor Analysis Consortium data + +**Publication Link:** `Ellis, M. J. et al. Connecting genomic alterations to cancer biology with proteomics: the NCI Clinical Proteomic Tumor Analysis Consortium. Cancer Discov. 3, 1108–1112 (2013). `_ + +**Data location:** `NIH NCI Proteommic Data Commons `_ + +**Detailed Description:** This dataset contains data from the Clinical Proteomic Tumor Analysis Consortium. It includes various cancer types and proteomic data. +We included only the data processed by the University of Michigan team's pipeline, and then post-processed by the Baylor College of Medicine's pipeline. Details +can be found in the STAR Methods of `'Proteogenomic Data and Resources for Pan-Cancer Analysis' `_ (i.e., 'BCM pipeline for pan-cancer multi-omics data harmonization'). + + +NCI60 +----- + +**Alias:** NCI60 + +**Description:** NCI-60 cell line data + +**Publication Link:** `Shoemaker, R. The NCI60 human tumour cell line anticancer drug screen. Nat Rev Cancer 6, 813–823 (2006). `_ + +**Data location:** `COSMOS R package - Bioconductor `_ + +**Detailed Description:** This dataset contains data from the NCI-60 cell line panel. It includes three files: TF activities from transcriptomics data, metabolite abundances, and gene reads. + +--------------- +Prior Knowledge +--------------- +Below, we provide a list of all the prior knowledge resources currently available in NetworkCommons. For each resource, we provide a description and a link to the original publication. + +OmniPath +-------- + +**Alias:** omnipath + +**Description:** OmniPath database + +**Publication Link:** `Türei, D. et al. OmniPath: guidelines and gateway for literature-curated signaling pathway resources. Nat Methods 13, 966–967 (2016). `_ + +**Detailed Description:** OmniPath is a comprehensive collection of signaling pathways and regulatory interactions. Currently, NetworkCommons include the signed and directed PPI network that can be obtained from Omnipath.Interactions. +Our aim is to expand the API to more data sources within OmniPath. For more information, please refer to the `OmniPath website `_ and the `OmniPath documentation page `_. + +Liana +----- + +**Alias:** liana + +**Description:** Liana database + +**Publication Link:** `Dimitrov, D., Türei, D., Garrido-Rodriguez, M. et al. Comparison of methods and resources for cell-cell communication inference from single-cell RNA-Seq data. Nat Commun 13, 3224 (2022). `_ + +**Detailed Description:** The Prior Knowledge from Liana contains ligand-receptor interactions. For more information, please refer to the `Liana documentation page `_. + +PhosphositePlus +--------------- + +**Alias:** phosphositeplus + +**Description:** PhosphositePlus database + +**Publication Link:** `Hornbeck, P. V. et al. PhosphoSitePlus, 2014: mutations, PTMs and recalibrations. Nucleic Acids Res 43, D512–D520 (2015). `_ + +**Detailed Description:** PhosphositePlus is a comprehensive resource that contains, among other PTM interactions, kinase-subsrate interactions, which can then be useful to infer kinase activities from phosphoproteomics data. +For more information, please refer to the `PhosphositePlus website `_. \ No newline at end of file diff --git a/docs/src/guidelines/guide_1_data.ipynb b/docs/src/guidelines/guide_1_data.ipynb index 56b16a4..b167b70 100644 --- a/docs/src/guidelines/guide_1_data.ipynb +++ b/docs/src/guidelines/guide_1_data.ipynb @@ -29,8 +29,6 @@ "cell_type": "markdown", "metadata": {}, "source": [ - ".. code-block:: yaml\n", - "\n", " NCI60:\n", " name: NCI60\n", " description: NCI-60 cell line data\n", @@ -51,7 +49,16 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import networkcommons as nc" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -97,31 +104,17 @@ " PANACEA contains dose-response and perturbational profiles for 32 kinase inhibitors in 11 cancer cell lines, in addition to a DMSO control. Originally, this resource served as the basis for a DREAM Challenge assessing the accuracy and sensitivity of computational algorithms for de novo drug polypharmacology predictions.\n", " \n", " \n", - " moon\n", - " MOON\n", - " Database files for running MOON\n", - " https://example.com/moon\n", - " This dataset contains database files required for running the MOON software.\n", - " \n", - " \n", - " cosmos\n", - " COSMOS\n", - " Database files for running COSMOS (MetaPKN)\n", - " https://example.com/cosmos\n", - " This dataset includes database files for the COSMOS software (MetaPKN).\n", - " \n", - " \n", " CPTAC\n", " CPTAC\n", " Clinical Proteomic Tumor Analysis Consortium data\n", - " https://example.com/CPTAC\n", + " https://doi.org/10.1158/2159-8290.CD-13-0219\n", " This dataset contains data from the Clinical Proteomic Tumor Analysis Consortium. It includes various cancer types and proteomic data.\n", " \n", " \n", " NCI60\n", " NCI60\n", " NCI-60 cell line data\n", - " https://example.com/NCI60\n", + " https://doi.org/10.1038/nrc1951\n", " This dataset contains data from the NCI-60 cell line panel. It includes three files: TF activities from transcriptomics data, metabolite abundances and gene reads.\n", " \n", " \n", @@ -132,37 +125,29 @@ " name \\\n", "decryptm DecryptM \n", "panacea Panacea \n", - "moon MOON \n", - "cosmos COSMOS \n", "CPTAC CPTAC \n", "NCI60 NCI60 \n", "\n", " description \\\n", "decryptm Drug perturbation proteomics and phosphoproteomics data \n", "panacea Pancancer Analysis of Chemical Entity Activity RNA-Seq data \n", - "moon Database files for running MOON \n", - "cosmos Database files for running COSMOS (MetaPKN) \n", "CPTAC Clinical Proteomic Tumor Analysis Consortium data \n", "NCI60 NCI-60 cell line data \n", "\n", - " publication_link \\\n", - "decryptm https://doi.org/10.1126/science.ade3925 \n", - "panacea https://doi.org/10.1016/j.xcrm.2021.100492 \n", - "moon https://example.com/moon \n", - "cosmos https://example.com/cosmos \n", - "CPTAC https://example.com/CPTAC \n", - "NCI60 https://example.com/NCI60 \n", + " publication_link \\\n", + "decryptm https://doi.org/10.1126/science.ade3925 \n", + "panacea https://doi.org/10.1016/j.xcrm.2021.100492 \n", + "CPTAC https://doi.org/10.1158/2159-8290.CD-13-0219 \n", + "NCI60 https://doi.org/10.1038/nrc1951 \n", "\n", " detailed_description \n", "decryptm This dataset contains the profiling of 31 cancer drugs in 13 human cancer cell line models resulted in 1.8 million dose-response curves, including 47,502 regulated phosphopeptides, 7316 ubiquitinylated peptides, and 546 regulated acetylated peptides. \n", "panacea PANACEA contains dose-response and perturbational profiles for 32 kinase inhibitors in 11 cancer cell lines, in addition to a DMSO control. Originally, this resource served as the basis for a DREAM Challenge assessing the accuracy and sensitivity of computational algorithms for de novo drug polypharmacology predictions. \n", - "moon This dataset contains database files required for running the MOON software. \n", - "cosmos This dataset includes database files for the COSMOS software (MetaPKN). \n", "CPTAC This dataset contains data from the Clinical Proteomic Tumor Analysis Consortium. It includes various cancer types and proteomic data. \n", "NCI60 This dataset contains data from the NCI-60 cell line panel. It includes three files: TF activities from transcriptomics data, metabolite abundances and gene reads. " ] }, - "execution_count": 21, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } diff --git a/docs/src/guidelines/guide_2_methods.ipynb b/docs/src/guidelines/guide_2_methods.ipynb new file mode 100644 index 0000000..7bf7d26 --- /dev/null +++ b/docs/src/guidelines/guide_2_methods.ipynb @@ -0,0 +1,115 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Contribution's guideline: Methods" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Thank you very much for considering contributing to the methods collection of **NetworkCommons**! For methods, it is especially important that inputs and outputs are compatible with the rest of the package, the purpose is stated and the assumptions of the method are clear." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. API implementation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Every new method should be implemented in a separate file (e.g `_moon.py`) inside `/networkcommons/methods/`. \n", + "* Contributors can then implement their own set of functionalities and expose those necessary to the public API via the `__all__` variable (see other files for examples).\n", + "* The input of the overall pipeline must be at least a `Network` object, and its overall output should return at least a `Network` object containing the contextualised network. This does not apply to intermediate functions (e.g `Network` --function 1--> `pd.DataFrame` --function 2--> `Network`) in case of a pipeline containing several functions, such as MOON." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Documentation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the `./docs/src/methods.rst` file, contributors should add:\n", + "\n", + "* The description of the method\n", + "* A figure showcasing the basics (if possible)\n", + "* Input/output definition\n", + "* Link to publication and repository (if available)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Functions should be documented using [Google style Python docstrings](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In `./docs/src/api.rst`, contributors should add a new documentation module that contains the new classes/functions implemented:" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " New Method\n", + " ~~~~\n", + " .. module::networkcommons.methods\n", + " .. currentmodule:: networkcommons\n", + "\n", + " .. autosummary::\n", + " :toctree: api\n", + " :recursive:\n", + "\n", + " methods.run_shortest_paths\n", + " methods.run_sign_consistency\n", + " methods.run_reachability_filter\n", + " methods.run_all_paths\n", + " methods.compute_all_paths\n", + " methods.add_pagerank_scores\n", + " methods.compute_ppr_overlap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "networkcommons-DX9y6Uxu-py3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/src/methods.rst b/docs/src/methods.rst new file mode 100644 index 0000000..795bc91 --- /dev/null +++ b/docs/src/methods.rst @@ -0,0 +1,121 @@ +####### +Methods +####### + +------------------- +Topological methods +------------------- + + +Shortest path +------------- + +The shortest path is an algorithm for finding one or multiple paths that minimize the distance from a set of starting nodes to a set of destination nodes in a weighted graph (https://doi.org/10.1007/BF01386390). + +.. image:: ./_static/sp.png + :alt: All paths + :width: 1000px + +**Input:** Set of source and target nodes, (weighted) network graph + +**Node weights:** w(v) = 1 + +**Edge weights:** 0 ≤ w(e) ≤ 1 + + +Sign consistency +---------------- + +The sign consistency method checks for sign consistency between the nodes in a given graph. Hereby, source and target nodes, as well as the edges in the graph have an assigned sign. + +.. image:: ./_static/sign.png + :alt: All paths + :width: 1000px + +**Input:** Set of source and target nodes (with a sign for up- or downregulation), network graph + +**Node weights:** w(v) ∈ {1, −1} + +**Edge weights:** w(e) ∈ {1, −1} + + +Reachability filter +------------------- + +The reachability filter generates a network consisting of all reachable nodes from a set of starting nodes. + +.. image:: ./_static/reach.png + :alt: All paths + :width: 1000px + +**Input:** Set of source nodes, network graph + +**Node weights:** w(v) ∈ {1} + +**Edge weights:** w(e) ∈ {1} + + +All paths +--------- + +All paths find all possible connections between a set of source nodes and a set of target nodes. In contrast to the shortest path method or the sign consistency method it doesn’t take the distance or any sign information into account, respectively. + +.. image:: ./_static/ap.png + :alt: All paths + :width: 1000px + +**Input:** Set of source and target nodes, network graph + +**Node weights:** w(v) ∈ {1} + +**Edge weights:** w(e) ∈ {1} + +-------------------------------------- +Random walk with restart (RWR) methods +-------------------------------------- + + +Page rank +--------- + +The Page rank algorithm initially calculates a weight for each node in a graph based on a random walk with restart method. It starts at a set of source or target nodes and determines the importance of the other nodes in the graph based on the structure of the incoming or outgoing edges. It then builds a network considering the highest-ranking nodes starting from each of the source and the target nodes. + +.. image:: ./_static/ppr.png + :alt: PPR + :width: 1000px + +**Input:** Set of source and target nodes, network graph + +**Node weights:** w(v) ∈ {1} + +**Edge weights:** w(e) ∈ {1} + +---------------------------- +Recursive enrichment methods +---------------------------- + +MOON +---- + +MOON (meta-footprint method) performs iterative footprint activity scoring and network diffusion from a set of target nodes to generate a sign consistent network (https://doi.org/10.1101/2024.07.15.603538). Starting from a set of weighted target nodes it calculates a weight for the next layer of upstream nodes using a univariate linear model. This process is repeated until a set of source nodes or a certain number of steps is reached. Hereby, any source node with an incoherent sign between MOON and the input sign is pruned out along with all incoming and outgoing edges. Additionally, edges between two inconsistent nodes are removed. + +**Input:** Set of weighted target nodes (and optionally weighted source nodes), network graph + +**Node weights:** w(v) ∈ ℝ + +**Edge weights:** w(e) ∈ ℝ + +----------------- +ILP-based methods +----------------- + +CORNETO - CARNIVAL +------------------ + +CORNETO (Constraint-based Optimization for the Reconstruction of NETworks from Omics) is a unified network inference method which combines a wide range of network methods including CARNIVAL which is currently implemented in NetworkCommons. CARNIVAL (CAusal Reasoning for Network identification using Integer VALue programming) connects a set of weighted target and source nodes using integer linear programming (ILP) and predicts the sign for the intermediate nodes (https://doi.org/10.1038/s41540-019-0118-z). Thereby, it optimizes a cost function that penalizes the inclusion of edges as well as the removal of target and source nodes. Additionally, it considers a set of constraints that among other things do not allow sign inconsistency. + +**Input:** Set of weighted target and source nodes, network graph + +**Node weights:** w(v) ∈ ℝ + +**Edge weights:** w(e) ∈ {1, −1} diff --git a/networkcommons/data/datasets.yaml b/networkcommons/data/datasets.yaml index 232869d..a78b04f 100644 --- a/networkcommons/data/datasets.yaml +++ b/networkcommons/data/datasets.yaml @@ -21,7 +21,7 @@ omics: Originally, this resource served as the basis for a DREAM Challenge assessing the accuracy and sensitivity of computational algorithms for de novo drug polypharmacology predictions. - path: panacea/panacea__{table}data.tsv + path: panacea/panacea__{table}.tsv test: name: Test data description: Small RNA-Seq data for unit tests diff --git a/networkcommons/data/omics/_panacea.py b/networkcommons/data/omics/_panacea.py index 10b1e1c..421d16e 100644 --- a/networkcommons/data/omics/_panacea.py +++ b/networkcommons/data/omics/_panacea.py @@ -20,25 +20,120 @@ from __future__ import annotations -__all__ = ['panacea'] +__all__ = ['panacea_experiments', 'panacea_datatypes', 'panacea_tables'] import pandas as pd +import os +import urllib.parse from . import _common +from networkcommons import _conf -def panacea() -> tuple[pd.DataFrame]: + +def panacea_experiments(update=True) -> pd.DataFrame: + """ + Table describing the experiments (drug-cell combinations) contained + in the Panacea dataset. + + Returns: + Data frame with all drug-cell line combinations + """ + + path = os.path.join(_conf.get('pickle_dir'), 'panacea_exps.pickle') + + if update or not os.path.exists(path): + + baseurl = urllib.parse.urljoin(_common._baseurl(), 'panacea') + + file_legend = pd.read_csv(baseurl + '/panacea__metadata.tsv', sep='\t') + + file_legend[['cell', 'drug']] = file_legend['group'].str.split('_', expand=True) + file_legend.drop(columns='sample_ID', inplace=True) + file_legend.drop_duplicates(inplace=True) + file_legend.reset_index(drop=True, inplace=True) + + file_legend.to_pickle(path) + + else: + + file_legend = pd.read_pickle(path) + + return file_legend + + +def panacea_datatypes() -> pd.DataFrame: + """ + Table describing the available data types in the Panacea dataset. + + Returns: + Data frame with all data types. """ - Pancancer Analysis of Chemical Entity Activity RNA-Seq data. + + return pd.DataFrame({ + 'type': ['raw', 'diffexp', 'TF_scores'], + 'description': ['RNA-Seq raw counts and metadata containing sample, name, and group', + 'Differential expression analysis with filterbyExpr+DESeq2', + 'Transcription factor activity scores with CollecTRI + T-values'], + }) + + +def panacea_tables(cell_line=None, drug=None, type='raw'): + """ + One table of countdata and one table of metadata from Panacea if raw data is selected. + If diffexp or TF_scores is selected, the corresponding table is returned. + + Args: + cell_line: + Name of the cell line(s). For a complete list see `panacea_experiments()`. + drug: + Name of the drug(s). For a complete list see `panacea_experiments()`. + type: + Type of data. For a complete list see `panacea_datatypes()`. Returns: - Two data frames: counts and meta data. + tuple[pd.DataFrame]: Two data frames: counts and meta data. """ + if (cell_line is None or drug is None) and type != 'raw': + raise ValueError('Please specify cell line and drug.') - return tuple( - _common._open( - _common._commons_url('panacea', table = table), + if type == 'raw': + + df_meta = _common._open( + _common._commons_url('panacea', table='metadata'), df = {'sep': '\t'}, ) - for table in ('count', 'meta') - ) + + df_meta[['cell', 'drug']] = df_meta['group'].str.split('_', expand=True) + + if isinstance(cell_line, str): + cell_line = [cell_line] + + if isinstance(drug, str): + drug = [drug] + + if cell_line is not None: + df_meta = df_meta[df_meta['cell'].isin(cell_line)] + + if drug is not None: + df_meta = df_meta[df_meta['drug'].isin(drug)] + + df_count = _common._open( + _common._commons_url('panacea', table='countdata'), + df={'sep': '\t'}, + ) + + subset_cols = df_meta['sample_ID'].tolist() + df_count = df_count.loc[:, ['gene_symbol'] + subset_cols] + + return df_count, df_meta + + elif type == 'diffexp' or type == 'TF_scores': + baseurl = urllib.parse.urljoin(_common._baseurl(), 'panacea/processed') + + proc_file = pd.read_csv(baseurl + f'/{cell_line}_{drug}__{type}.tsv', sep='\t') + + return proc_file + + else: + raise ValueError(f'Unknown data type: {type}.') diff --git a/networkcommons/methods/_causal.py b/networkcommons/methods/_causal.py index 5e5ad55..173ee15 100644 --- a/networkcommons/methods/_causal.py +++ b/networkcommons/methods/_causal.py @@ -20,8 +20,6 @@ from __future__ import annotations __all__ = [ - 'to_cornetograph', - 'to_networkx', 'run_corneto_carnival', ]