diff --git a/docs/tutorial/finding-genesets.ipynb b/docs/tutorial/finding-genesets.ipynb index bf9fb39..94fef06 100644 --- a/docs/tutorial/finding-genesets.ipynb +++ b/docs/tutorial/finding-genesets.ipynb @@ -3,16 +3,105 @@ { "metadata": {}, "cell_type": "markdown", - "source": "## Dependencies and Set Up", + "source": "# Finding Genesets", + "id": "42133665392f8eb5" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-06-05T17:17:07.808121Z", + "start_time": "2024-06-05T17:17:07.758773Z" + } + }, + "cell_type": "code", + "source": "import requests", + "id": "e58b15d46f719712", + "outputs": [], + "execution_count": 1 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-06-05T17:17:09.533629Z", + "start_time": "2024-06-05T17:17:09.067666Z" + } + }, + "cell_type": "code", + "source": [ + "result = requests.get(\n", + " \"https://geneweaver.jax.org/api/genesets\",\n", + " params={\n", + " \"search_text\": \"cancer\",\n", + " \"limit\": 10\n", + " },\n", + ")\n", + "result.ok" + ], + "id": "acf259fea13fbd36", + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 2 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-06-05T17:17:10.334540Z", + "start_time": "2024-06-05T17:17:10.331734Z" + } + }, + "cell_type": "code", + "source": [ + "cancer_genesets = result.json()[\"data\"]\n", + "for i in cancer_genesets:\n", + " print(f\"GS{i['id']} has {i['count']} genes, is Tier-{i['curation_id']}, and is named {i['name']}\")" + ], + "id": "2a009795964dae3a", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GS83602 has 309 genes, is Tier-3, and is named Colorectal Cancer\n", + "GS243526 has 4 genes, is Tier-2, and is named [MeSH] Parotid Neoplasms : D010307\n", + "GS241454 has 1188 genes, is Tier-2, and is named [MeSH] Prostatic Neoplasms : D011471\n", + "GS241717 has 60 genes, is Tier-2, and is named [MeSH] Salivary Gland Neoplasms : D012468\n", + "GS242761 has 52 genes, is Tier-2, and is named [MeSH] Gallbladder Neoplasms : D005706\n", + "GS236367 has 3 genes, is Tier-2, and is named [MeSH] Cecal Neoplasms : D002430\n", + "GS238254 has 89 genes, is Tier-2, and is named [MeSH] Rectal Neoplasms : D012004\n", + "GS166984 has 5 genes, is Tier-1, and is named MP:0010337 increased chronic lymphocytic leukemia incidence\n", + "GS235575 has 120 genes, is Tier-2, and is named [MeSH] Genes, BRCA1 : D019398\n", + "GS242457 has 51 genes, is Tier-2, and is named [MeSH] Adenocarcinoma, Clear Cell : D018262\n" + ] + } + ], + "execution_count": 3 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## Finding Private Genesets\n", + "### Dependencies and Set Up" + ], "id": "d6d91fc15ac330d8" }, { "metadata": {}, "cell_type": "code", - "outputs": [], - "execution_count": null, "source": "!pip install geneweaver-client", - "id": "d932df03b02fc0e8" + "id": "d932df03b02fc0e8", + "outputs": [], + "execution_count": null }, { "cell_type": "code", @@ -20,19 +109,19 @@ "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2024-05-28T13:24:21.132102Z", - "start_time": "2024-05-28T13:24:20.315733Z" + "end_time": "2024-06-05T17:17:23.933360Z", + "start_time": "2024-06-05T17:17:23.752049Z" } }, "source": "from geneweaver.client.auth import login", "outputs": [], - "execution_count": 1 + "execution_count": 5 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-05-28T14:15:34.037708Z", - "start_time": "2024-05-28T14:15:14.743965Z" + "end_time": "2024-06-05T17:17:36.621812Z", + "start_time": "2024-06-05T17:17:24.448275Z" } }, "cell_type": "code", @@ -43,27 +132,39 @@ "name": "stdout", "output_type": "stream", "text": [ - "1. On your computer or mobile device navigate to: https://geneweaver.auth0.com/activate?user_code=VPPC-FWSQ\n", - "2. Enter the following code: VPPC-FWSQ\n", + "1. On your computer or mobile device navigate to: https://geneweaver.auth0.com/activate?user_code=JTCP-ZDCS\n", + "2. Enter the following code: JTCP-ZDCS\n", "Authenticated!\n", "- Id Token: eyJhbGciOi...\n" ] } ], - "execution_count": 2 + "execution_count": 6 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "", + "id": "f40bce34b226ecf" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "", + "id": "58280debf8ea8103" }, { "metadata": { "ExecuteTime": { - "end_time": "2024-05-28T14:18:35.250326Z", - "start_time": "2024-05-28T14:18:35.224788Z" + "end_time": "2024-06-05T17:17:39.111223Z", + "start_time": "2024-06-05T17:17:39.105489Z" } }, "cell_type": "code", "source": "from geneweaver.client.auth import get_access_token", "id": "9a222f2e97f34647", "outputs": [], - "execution_count": 4 + "execution_count": 7 }, { "metadata": {}, @@ -74,21 +175,21 @@ { "metadata": { "ExecuteTime": { - "end_time": "2024-05-28T14:19:16.611744Z", - "start_time": "2024-05-28T14:19:16.598106Z" + "end_time": "2024-06-05T17:17:40.443713Z", + "start_time": "2024-06-05T17:17:40.441797Z" } }, "cell_type": "code", "source": "import requests", "id": "5fed1b8b6ec94259", "outputs": [], - "execution_count": 6 + "execution_count": 8 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-05-28T14:27:23.944721Z", - "start_time": "2024-05-28T14:27:22.740834Z" + "end_time": "2024-06-05T17:17:42.139832Z", + "start_time": "2024-06-05T17:17:40.815160Z" } }, "cell_type": "code", @@ -97,7 +198,6 @@ " \"https://geneweaver.jax.org/api/genesets\",\n", " params={\n", " \"search_text\": \"cancer\",\n", - " \"curation_tier\": \"Tier I\",\n", " \"limit\": 10\n", " },\n", " headers={\"Authorization\": f\"Bearer {get_access_token()}\"}\n", @@ -112,37 +212,25 @@ "True" ] }, - "execution_count": 56, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], - "execution_count": 56 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2024-05-28T14:27:24.698625Z", - "start_time": "2024-05-28T14:27:24.696627Z" - } - }, - "cell_type": "code", - "source": "cancer_genesets = result.json()[\"data\"]", - "id": "8a0a7ef040973dfe", - "outputs": [], - "execution_count": 57 + "execution_count": 9 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-05-28T14:27:25.889802Z", - "start_time": "2024-05-28T14:27:25.887410Z" + "end_time": "2024-06-05T17:17:42.143610Z", + "start_time": "2024-06-05T17:17:42.141003Z" } }, "cell_type": "code", "source": [ + "cancer_genesets = result.json()[\"data\"]\n", "for i in cancer_genesets:\n", - " print(i[\"name\"])" + " print(f\"GS{i['id']} has {i['count']} genes, is Tier-{i['curation_id']}, and is named {i['name']}\")" ], "id": "b40c7fce9c826e32", "outputs": [ @@ -150,20 +238,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "MP:0010337 increased chronic lymphocytic leukemia incidence\n", - "MSigDB Geneset - MORF_ZNF10\n", - "MSigDB Geneset - KRAS.LUNG.BREAST_UP.V1_UP\n", - "MSigDB Geneset - MORF_HDAC2\n", - "MSigDB Geneset - GCM_PTPRD\n", - "GWAS Catalog Data for breast carcinoma in 10,052 European ancestry cases, 12,575 European ancestry controls\n", - "MSigDB Geneset - MORF_TFDP2\n", - "GWAS Catalog Data for response to platinum based chemotherapy, non-small cell lung carcinoma in 327 European ancestry individuals\n", - "MSigDB Geneset - MORF_PML\n", - "MSigDB Geneset - GCM_ERCC4\n" + "GS83602 has 309 genes, is Tier-3, and is named Colorectal Cancer\n", + "GS355610 has 1751 genes, is Tier-5, and is named Evading Growth Suppressors\n", + "GS355050 has 31 genes, is Tier-5, and is named cancer gene transcripts_1_Pozhitkov2018\n", + "GS243526 has 4 genes, is Tier-2, and is named [MeSH] Parotid Neoplasms : D010307\n", + "GS216191 has 135 genes, is Tier-5, and is named 15\n", + "GS355058 has 31 genes, is Tier-5, and is named cancer gene transcripts_6_Pozhitkov2018\n", + "GS250872 has 56 genes, is Tier-None, and is named bmv77\n", + "GS241454 has 1188 genes, is Tier-2, and is named [MeSH] Prostatic Neoplasms : D011471\n", + "GS241717 has 60 genes, is Tier-2, and is named [MeSH] Salivary Gland Neoplasms : D012468\n", + "GS242761 has 52 genes, is Tier-2, and is named [MeSH] Gallbladder Neoplasms : D005706\n" ] } ], - "execution_count": 58 + "execution_count": 10 }, { "metadata": {}, @@ -174,8 +262,8 @@ { "metadata": { "ExecuteTime": { - "end_time": "2024-05-28T14:28:38.858820Z", - "start_time": "2024-05-28T14:28:37.607796Z" + "end_time": "2024-06-05T17:17:43.550159Z", + "start_time": "2024-06-05T17:17:42.144490Z" } }, "cell_type": "code", @@ -184,7 +272,6 @@ " \"https://geneweaver.jax.org/api/genesets\",\n", " params={\n", " \"search_text\": \"gwas & cancer\",\n", - " \"curation_tier\": \"Tier I\",\n", " \"limit\": 10\n", " },\n", " headers={\"Authorization\": f\"Bearer {get_access_token()}\"}\n", @@ -199,70 +286,25 @@ "True" ] }, - "execution_count": 60, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], - "execution_count": 60 + "execution_count": 11 }, { "metadata": { "ExecuteTime": { - "end_time": "2024-05-28T14:28:38.861832Z", - "start_time": "2024-05-28T14:28:38.859816Z" - } - }, - "cell_type": "code", - "source": "gwas_cancer_genesets = result.json()[\"data\"]", - "id": "b173bc8938da9893", - "outputs": [], - "execution_count": 61 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2024-05-28T14:28:38.864681Z", - "start_time": "2024-05-28T14:28:38.862696Z" - } - }, - "cell_type": "code", - "source": [ - "for i in gwas_cancer_genesets:\n", - " print(i[\"name\"])" - ], - "id": "f4396390d053338b", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GWAS Catalog Data for breast carcinoma in 10,052 European ancestry cases, 12,575 European ancestry controls\n", - "GWAS Catalog Data for response to platinum based chemotherapy, non-small cell lung carcinoma in 327 European ancestry individuals\n", - "GWAS Catalog Data for lung carcinoma in 2,331 Han Chinese ancestry lung cancer cases, 1,006 Han Chinese ancestry non-cardia gastric cancer cases, 2,031 Han Chinese ancestry esophageal squamous-cell carcinoma cases, 4,006 Han Chinese ancestry controls\n", - "GWAS Catalog Data for lung carcinoma, squamous cell carcinoma, gastric carcinoma in 2,331 Han Chinese ancestry lung cancer cases, 1,006 Han Chinese ancestry non-cardia gastric cancer cases, 2,031 Han Chinese ancestry esophageal squamous-cell carcinoma cases, 4,006 Han Chinese ancestry controls\n", - "GWAS Catalog Data for estrogen-receptor negative breast cancer in 4,939 European ancestry cases, 14,352 European ancestry controls\n", - "GWAS Catalog Data for breast carcinoma in 899 European ancestry cases, 804 European ancestry controls\n", - "GWAS Catalog Data for breast carcinoma in 1,426 European ancestry cases, 1,301 European ancestry controls\n", - "GWAS Catalog Data for ovarian carcinoma in 1,817 European ancestry cases, 2,353 European ancestry controls\n", - "GWAS Catalog Data for breast carcinoma in 1,367 Sardinian cases, 1,658 Sardinian controls\n", - "GWAS Catalog Data for endometrial carcinoma in 4,907 European ancestry cases, 11,945 European ancestry controls\n" - ] - } - ], - "execution_count": 62 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2024-05-28T14:29:22.658230Z", - "start_time": "2024-05-28T14:29:22.655886Z" + "end_time": "2024-06-05T17:17:43.553859Z", + "start_time": "2024-06-05T17:17:43.551488Z" } }, "cell_type": "code", "source": [ - "for i in gwas_cancer_genesets:\n", - " print(f\"GS{i['id']} has {i['count']} genes\")" + "cancer_genesets = result.json()[\"data\"]\n", + "for i in cancer_genesets:\n", + " print(f\"GS{i['id']} has {i['count']} genes, is Tier-{i['curation_id']}, and is named \\n\\t {i['name']}\")" ], "id": "30e29e4ca07bfa24", "outputs": [ @@ -270,28 +312,38 @@ "name": "stdout", "output_type": "stream", "text": [ - "GS267835 has 98 genes\n", - "GS270261 has 1 genes\n", - "GS268187 has 3 genes\n", - "GS268186 has 3 genes\n", - "GS271027 has 30 genes\n", - "GS269914 has 1 genes\n", - "GS270401 has 7 genes\n", - "GS270122 has 2 genes\n", - "GS267760 has 2 genes\n", - "GS268492 has 8 genes\n" + "GS267835 has 98 genes, is Tier-1, and is named \n", + "\t GWAS Catalog Data for breast carcinoma in 10,052 European ancestry cases, 12,575 European ancestry controls\n", + "GS270261 has 1 genes, is Tier-1, and is named \n", + "\t GWAS Catalog Data for response to platinum based chemotherapy, non-small cell lung carcinoma in 327 European ancestry individuals\n", + "GS268187 has 3 genes, is Tier-1, and is named \n", + "\t GWAS Catalog Data for lung carcinoma in 2,331 Han Chinese ancestry lung cancer cases, 1,006 Han Chinese ancestry non-cardia gastric cancer cases, 2,031 Han Chinese ancestry esophageal squamous-cell carcinoma cases, 4,006 Han Chinese ancestry controls\n", + "GS268186 has 3 genes, is Tier-1, and is named \n", + "\t GWAS Catalog Data for lung carcinoma, squamous cell carcinoma, gastric carcinoma in 2,331 Han Chinese ancestry lung cancer cases, 1,006 Han Chinese ancestry non-cardia gastric cancer cases, 2,031 Han Chinese ancestry esophageal squamous-cell carcinoma cases, 4,006 Han Chinese ancestry controls\n", + "GS271027 has 30 genes, is Tier-1, and is named \n", + "\t GWAS Catalog Data for estrogen-receptor negative breast cancer in 4,939 European ancestry cases, 14,352 European ancestry controls\n", + "GS212622 has 2 genes, is Tier-5, and is named \n", + "\t Aggregated Genome-Wide Associations for 'Breast cancer (survival)' in Humans\n", + "GS269914 has 1 genes, is Tier-1, and is named \n", + "\t GWAS Catalog Data for breast carcinoma in 899 European ancestry cases, 804 European ancestry controls\n", + "GS270401 has 7 genes, is Tier-1, and is named \n", + "\t GWAS Catalog Data for breast carcinoma in 1,426 European ancestry cases, 1,301 European ancestry controls\n", + "GS270122 has 2 genes, is Tier-1, and is named \n", + "\t GWAS Catalog Data for ovarian carcinoma in 1,817 European ancestry cases, 2,353 European ancestry controls\n", + "GS212755 has 20 genes, is Tier-5, and is named \n", + "\t Aggregated Genome-Wide Associations for 'Lung cancer' in Humans\n" ] } ], - "execution_count": 66 + "execution_count": 12 }, { "metadata": {}, "cell_type": "code", - "outputs": [], - "execution_count": null, "source": "", - "id": "eeb496d3d034ca3a" + "id": "eeb496d3d034ca3a", + "outputs": [], + "execution_count": null } ], "metadata": {