Merge branch 'add-brain-filter-protein-list'

MaayanLab · Oct 20, 2023 · fa7448a · fa7448a
2 parents 8572f20 + 828dc33
commit fa7448a
Show file tree

Hide file tree

Showing 2 changed files with 121 additions and 5 deletions.
diff --git a/appyters/Tumor_Gene_Target_Screener/Tumor_Gene_Target_Screener.ipynb b/appyters/Tumor_Gene_Target_Screener/Tumor_Gene_Target_Screener.ipynb
@@ -244,14 +244,52 @@
     "    section='primary',\n",
     ") %}\n",
     "\n",
+    "{% set filter_brain = BoolField(\n",
+    "    name='filter_brain',\n",
+    "    label='Filter out brain related tissues from the background',\n",
+    "    default=False,\n",
+    "    section='primary',\n",
+    ") %}\n",
+    "\n",
+    "\n",
+    "\n",
     "{% set proteomics_vis_ = BoolField(\n",
     "    name='proteomics_vis',\n",
     "    label='Show protein expression profiles of gene candidates',\n",
     "    description='View protein expression levels in normal tissues, from Human Proteome Map and Human Protein Atlas proteomics data. Only human currently supported.',\n",
     "    default=True,\n",
     "    section='primary',\n",
     ") %}\n",
-    "{% set proteomics_vis = background_organism == '\"Mammalia/Homo_sapiens\"' and proteomics_vis_.raw_value %}"
+    "\n",
+    "{% set proteomics_vis = background_organism == '\"Mammalia/Homo_sapiens\"' and proteomics_vis_.raw_value %}\n",
+    "\n",
+    "{% set use_protein_list_ = TabField(\n",
+    "    name='use_protein_list',\n",
+    "    label='Prioritize list of highly expressed proteins',\n",
+    "    description='Priortize a list of proteins highly expressed in the uploaded samples',\n",
+    "    default='No',\n",
+    "    section='primary',\n",
+    "    choices={\n",
+    "     'Yes': [\n",
+    "    FileField(\n",
+    "        name='proteins_file',\n",
+    "        label='Protein list',\n",
+    "        description='A newline separated list of significantly highly expressed proteins in the uploaded samples',\n",
+    "        default='proteins_example.txt',\n",
+    "        required=True,\n",
+    "        examples={\n",
+    "            'proteins_example.txt': 'https://appyters.maayanlab.cloud/storage/Tumor_Gene_Target_Screener/proteins_example.txt',\n",
+    "        },\n",
+    "        section='primary',\n",
+    "    ),\n",
+    "    ],\n",
+    "    'No': [\n",
+    "    ]\n",
+    "    }\n",
+    ") %}\n",
+    "\n",
+    "\n",
+    "{% set use_protein_list = use_protein_list_.raw_value == 'Yes' %}"
    ]
   },
   {
@@ -273,6 +311,7 @@
    "source": [
     "%%appyter code_exec\n",
     "import os\n",
+    "import re\n",
     "import qnorm\n",
     "import numpy as np\n",
     "import pandas as pd\n",
@@ -364,6 +403,36 @@
     "df_expr"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "214644e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%appyter markdown\n",
+    "{% if use_protein_list %}\n",
+    "## Protein list\n",
+    "A list of proteins highly expressed in the uploaded samples was provided. These protiens will be prioritized in the final result of the analysis.\n",
+    "{% endif %}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f2e6e2b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%appyter code_exec\n",
+    "{% if use_protein_list %}\n",
+    "protein_list_filename = {{ use_protein_list_.value[0] }}\n",
+    "with open(protein_list_filename, 'r') as fr:\n",
+    "    lines = fr.readlines()\n",
+    "    proteins = [x.strip() for x in lines]\n",
+    "{% endif %}"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "ebcb9d0b",
@@ -394,6 +463,37 @@
     "df_bg_expr"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7cbe4b90",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%appyter markdown\n",
+    "{% if filter_brain.raw_value %}\n",
+    "## Include target genes that are only highly expressed in brain related cells and tissues\n",
+    "By removing brain related tissues and cell types from the normal backgrounds, we can identify genes that are \n",
+    "highly expressed only in the brain and in non-brain solid tumors, while lowly expressed in all other non-brain \n",
+    "normal tissues and cell types. The idea behind this filter is that the blood brain barrier (BBB) can server as a \n",
+    "protective layer from therapeutic agents aimed at targeting genes that are highly expressed in tumors.\n",
+    "{% endif %}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15eb496b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%appyter code_eval\n",
+    "{% if filter_brain.raw_value %}\n",
+    "brainvar = \"brain|cerebral|hippocampus|amygdala|neocortex|prefrontal|thalamus|cerebellum|pons|hypothalamic|hypothalamus|caudate|intracranial|cortical|dlpfc|cerebellar|basal ganglia\"\n",
+    "df_bg_expr = df_bg_expr[df_bg_expr.columns.drop(list(df_bg_expr.filter(regex=re.compile(brainvar, re.IGNORECASE))))]\n",
+    "{% endif %}"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -736,6 +836,7 @@
     "dge['is_deg'] = dge['adj.P.Val'] < 0.05\n",
     "dge['is_significant'] = prod > prod.mean() + 3 * prod.std()\n",
     "dge['score'] = dge['is_deg'].astype(int) + dge['is_significant'].astype(int)\n",
+    "\n",
     "#\n",
     "fig = go.Figure()\n",
     "fig.add_trace(go.Scattergl(\n",
@@ -847,7 +948,11 @@
    "outputs": [],
    "source": [
     "%%appyter code_exec\n",
-    "dge_final = dge[dge.score >= 1].sort_values(['score', '-log(adj.P.Val)'], ascending=False).iloc[:16]\n",
+    "{% if use_protein_list %}\n",
+    "dge['in_protein_list'] = np.in1d(dge['gene_symbol'], proteins)\n",
+    "dge['score'] = dge['score'] + dge['in_protein_list'].astype(int)\n",
+    "{% endif %}\n",
+    "dge_final = dge[dge.score >= 1].sort_values(['score', 't'], ascending=False).iloc[:16]\n",
     "pd.set_option('display.max_colwidth', None)\n",
     "dge_final['Link'] = dge_final['gene_symbol'].map(lambda g: f\"<a href=\\\"https://cfde-gene-pages.cloud/gene/{g}\\\">{g}</a>\")\n",
     "{% if membrane_screener %}\n",
@@ -859,6 +964,9 @@
     "    'is_deg',\n",
     "    'is_significant',\n",
     "    'is_membrane',\n",
+    "{% if use_protein_list %}\n",
+    "    'in_protein_list',\n",
+    "{% endif %}\n",
     "    'score',\n",
     "    'Link',\n",
     "]].to_html(notebook=True, escape=False)))\n",
@@ -870,6 +978,9 @@
     "    'adj.P.Val',\n",
     "    'is_deg',\n",
     "    'is_significant',\n",
+    "{% if use_protein_list %}\n",
+    "    'in_protein_list',\n",
+    "{% endif %}\n",
     "    'score',\n",
     "    'Link',\n",
     "]].to_html(notebook=True, escape=False)))\n",
@@ -1122,7 +1233,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.9.17"
   },
   "vscode": {
    "interpreter": {

diff --git a/appyters/Tumor_Gene_Target_Screener/appyter.json b/appyters/Tumor_Gene_Target_Screener/appyter.json
@@ -2,7 +2,7 @@
   "$schema": "https://raw.githubusercontent.com/MaayanLab/appyter-catalog/main/schema/appyter-validator.json",
   "name": "Tumor_Gene_Target_Screener",
   "title": "Tumor Gene Target Screener",
-  "version": "0.3.9",
+  "version": "0.4.0",
   "description": "An appyter for gene target screening of tumors",
   "authors": [
     {
@@ -16,7 +16,12 @@
     {
       "name": "Reid H. Fleishman",
       "email": "reidfleishman5@gmail.com"
+    },
+    {
+      "name": "Giacomo B. Marino",
+      "email": "giacomobmarino@gmail.com"
     }
+
   ],
   "image": "thumbnail.png",
   "url": "https://github.com/maayanLab/appyter-catalog",
@@ -25,7 +30,7 @@
     "Differential Expression Analysis"
   ],
   "license": "CC-BY-NC-SA-4.0",
-  "public": false,
+  "public": true,
   "appyter": {
     "file": "Tumor_Gene_Target_Screener.ipynb",
     "profile": "biojupies",