From 6935d4f0f90a19266003e3ce5a727d92f1961b9e Mon Sep 17 00:00:00 2001 From: Alexander Berger Date: Tue, 2 Apr 2024 14:03:05 -0400 Subject: [PATCH] Adding 'open in colabl' button --- docs/tutorial/map-gene-ids.ipynb | 666 ++++++++++++++++--------------- 1 file changed, 335 insertions(+), 331 deletions(-) diff --git a/docs/tutorial/map-gene-ids.ipynb b/docs/tutorial/map-gene-ids.ipynb index 2ba89ef..6c9dee7 100644 --- a/docs/tutorial/map-gene-ids.ipynb +++ b/docs/tutorial/map-gene-ids.ipynb @@ -1,352 +1,356 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# Map Gene IDs\n", - "\n", - "This tutorial will show you how to directly call the GeneWeaver API to map Gene IDs within a species. For this example, we will map Gene Symbols to Ensemble Gene IDs." - ], - "metadata": { - "id": "IxAq_kYX5xZB" - } - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "9sNkibUK22so" - }, - "outputs": [], - "source": [ - "import requests" - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Initialize Gene IDs\n", - "First, you will need to initalize a list of the Gene IDs you want to map. Here, we load identifiers from a file.\n", - "\n", - "After the end of this step you will need to have a list of gene IDs as shown below." - ], - "metadata": { - "id": "qqSZt-Ef6HSP" - } - }, - { - "cell_type": "code", - "source": [ - "with open('gene_names.txt', 'r') as file:\n", - " file_content = file.read().strip()\n", - " ids = [id.strip('\"') for id in file_content.split(',')]" - ], - "metadata": { - "id": "dNjGLcYT3H0r" - }, - "execution_count": 2, - "outputs": [] + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Map Gene IDs\n", + "\n", + "This tutorial will show you how to directly call the GeneWeaver API to map Gene IDs within a species. For this example, we will map Gene Symbols to Ensemble Gene IDs.\n", + "\n", + "To get started, you can \n", + " \"Open\n", + "" + ], + "metadata": { + "id": "IxAq_kYX5xZB" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "9sNkibUK22so" + }, + "outputs": [], + "source": [ + "import requests" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Initialize Gene IDs\n", + "First, you will need to initalize a list of the Gene IDs you want to map. Here, we load identifiers from a file.\n", + "\n", + "After the end of this step you will need to have a list of gene IDs as shown below." + ], + "metadata": { + "id": "qqSZt-Ef6HSP" + } + }, + { + "cell_type": "code", + "source": [ + "with open('gene_names.txt', 'r') as file:\n", + " file_content = file.read().strip()\n", + " ids = [id.strip('\"') for id in file_content.split(',')]" + ], + "metadata": { + "id": "dNjGLcYT3H0r" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "len(ids)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "EQLB21_j5HG0", + "outputId": "65ef2d27-b0d7-447e-ab23-00dbc35a3172" + }, + "execution_count": 3, + "outputs": [ { - "cell_type": "code", - "source": [ - "len(ids)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "EQLB21_j5HG0", - "outputId": "65ef2d27-b0d7-447e-ab23-00dbc35a3172" - }, - "execution_count": 3, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "32285" - ] - }, - "metadata": {}, - "execution_count": 3 - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "32285" ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "ids[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "z7Hz1w8B5OO8", + "outputId": "943c118b-3e99-4290-9bbe-bf4367e46805" + }, + "execution_count": 4, + "outputs": [ { - "cell_type": "code", - "source": [ - "ids[:5]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "z7Hz1w8B5OO8", - "outputId": "943c118b-3e99-4290-9bbe-bf4367e46805" - }, - "execution_count": 4, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['Xkr4', 'Gm1992', 'Gm19938', 'Gm37381', 'Rp1']" - ] - }, - "metadata": {}, - "execution_count": 4 - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "['Xkr4', 'Gm1992', 'Gm19938', 'Gm37381', 'Rp1']" ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Call The GeneWeaver ReST API\n", + "\n", + "First we'll construct our ReST call." + ], + "metadata": { + "id": "crIVfKOL7ESB" + } + }, + { + "cell_type": "code", + "source": [ + "payload = {\n", + " \"source_ids\": ids,\n", + " \"target_gene_id_type\": \"Ensemble Gene\",\n", + " \"species\": \"Mus Musculus\"\n", + "}" + ], + "metadata": { + "id": "qdMft1Un3K4-" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "The GeneWeaver API uses JSON, so let's specufy that in our request headers." + ], + "metadata": { + "id": "cLlJEtDi7k33" + } + }, + { + "cell_type": "code", + "source": [ + "headers = {\n", + " 'accept': 'application/json',\n", + " 'Content-Type': 'application/json'\n", + "}" + ], + "metadata": { + "id": "f-Y4Wrm93LSc" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "response = requests.post('https://geneweaver.jax.org/api/genes/mapping', json=payload, headers=headers)" + ], + "metadata": { + "id": "wSFcduFY3Npg" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Process Results\n", + "\n", + "The mapping endpoint will return a dictionary with a list of results available on the `gene_ids_map` key. Let's process that into a dictionary with the original IDs as keys and te new IDs as values." + ], + "metadata": { + "id": "TuO5nDxx7wNX" + } + }, + { + "cell_type": "code", + "source": [ + "mapping = {\n", + " r[\"original_ref_id\"]: r[\"mapped_ref_id\"]\n", + " for r in response.json()['gene_ids_map']\n", + "}" + ], + "metadata": { + "id": "bvkt1cef3OiA" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "We can now easily access a list of our mapped IDs." + ], + "metadata": { + "id": "jwg4YvS07-EK" + } + }, + { + "cell_type": "code", + "source": [ + "mapped_ids = list(mapping.values())" + ], + "metadata": { + "id": "Rm4fdKMt5c8L" + }, + "execution_count": 9, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "len(mapped_ids)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "KW-WnSfg4xau", + "outputId": "e2987b55-735b-4d14-e6c4-863a5346239f" + }, + "execution_count": 10, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "### Call The GeneWeaver ReST API\n", - "\n", - "First we'll construct our ReST call." - ], - "metadata": { - "id": "crIVfKOL7ESB" - } - }, - { - "cell_type": "code", - "source": [ - "payload = {\n", - " \"source_ids\": ids,\n", - " \"target_gene_id_type\": \"Ensemble Gene\",\n", - " \"species\": \"Mus Musculus\"\n", - "}" - ], - "metadata": { - "id": "qdMft1Un3K4-" - }, - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "The GeneWeaver API uses JSON, so let's specufy that in our request headers." - ], - "metadata": { - "id": "cLlJEtDi7k33" - } - }, - { - "cell_type": "code", - "source": [ - "headers = {\n", - " 'accept': 'application/json',\n", - " 'Content-Type': 'application/json'\n", - "}" - ], - "metadata": { - "id": "f-Y4Wrm93LSc" - }, - "execution_count": 6, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "response = requests.post('https://geneweaver.jax.org/api/genes/mapping', json=payload, headers=headers)" - ], - "metadata": { - "id": "wSFcduFY3Npg" - }, - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "### Process Results\n", - "\n", - "The mapping endpoint will return a dictionary with a list of results available on the `gene_ids_map` key. Let's process that into a dictionary with the original IDs as keys and te new IDs as values." - ], - "metadata": { - "id": "TuO5nDxx7wNX" - } - }, - { - "cell_type": "code", - "source": [ - "mapping = {\n", - " r[\"original_ref_id\"]: r[\"mapped_ref_id\"]\n", - " for r in response.json()['gene_ids_map']\n", - "}" - ], - "metadata": { - "id": "bvkt1cef3OiA" - }, - "execution_count": 8, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "We can now easily access a list of our mapped IDs." - ], - "metadata": { - "id": "jwg4YvS07-EK" - } - }, - { - "cell_type": "code", - "source": [ - "mapped_ids = list(mapping.values())" - ], - "metadata": { - "id": "Rm4fdKMt5c8L" - }, - "execution_count": 9, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "len(mapped_ids)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "KW-WnSfg4xau", - "outputId": "e2987b55-735b-4d14-e6c4-863a5346239f" - }, - "execution_count": 10, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "32050" - ] - }, - "metadata": {}, - "execution_count": 10 - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "32050" ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "source": [ + "mapped_ids[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "NeNORUE35mnZ", + "outputId": "bb2dc823-b39a-47cb-97b8-9652c42bc11f" + }, + "execution_count": 11, + "outputs": [ { - "cell_type": "code", - "source": [ - "mapped_ids[:5]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "NeNORUE35mnZ", - "outputId": "bb2dc823-b39a-47cb-97b8-9652c42bc11f" - }, - "execution_count": 11, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['ENSMUSG00000066586',\n", - " 'ENSMUSG00000027596',\n", - " 'ENSMUSG00000030359',\n", - " 'ENSMUSG00000027597',\n", - " 'ENSMUSG00000019986']" - ] - }, - "metadata": {}, - "execution_count": 11 - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "['ENSMUSG00000066586',\n", + " 'ENSMUSG00000027596',\n", + " 'ENSMUSG00000030359',\n", + " 'ENSMUSG00000027597',\n", + " 'ENSMUSG00000019986']" ] + }, + "metadata": {}, + "execution_count": 11 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "And we can see which IDs couldn't be mapped." + ], + "metadata": { + "id": "QgmtwpaQ8CL1" + } + }, + { + "cell_type": "code", + "source": [ + "unmapped_ids = [\n", + " _id for _id in ids if _id not in mapping\n", + "]\n" + ], + "metadata": { + "id": "qI-la0Fe3S6Y" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "len(unmapped_ids)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "j_hgdLrT4SJW", + "outputId": "7ab04cb4-eb7a-41e4-c0b3-ec75689d7565" + }, + "execution_count": 13, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "And we can see which IDs couldn't be mapped." - ], - "metadata": { - "id": "QgmtwpaQ8CL1" - } - }, - { - "cell_type": "code", - "source": [ - "unmapped_ids = [\n", - " _id for _id in ids if _id not in mapping\n", - "]\n" - ], - "metadata": { - "id": "qI-la0Fe3S6Y" - }, - "execution_count": 12, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "len(unmapped_ids)" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "j_hgdLrT4SJW", - "outputId": "7ab04cb4-eb7a-41e4-c0b3-ec75689d7565" - }, - "execution_count": 13, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "235" - ] - }, - "metadata": {}, - "execution_count": 13 - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "235" ] + }, + "metadata": {}, + "execution_count": 13 + } + ] + }, + { + "cell_type": "code", + "source": [ + "unmapped_ids[:5]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ZXYJoV7G5hli", + "outputId": "8ce62cd9-afb5-4eeb-af77-c21e11e04c79" + }, + "execution_count": 14, + "outputs": [ { - "cell_type": "code", - "source": [ - "unmapped_ids[:5]" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ZXYJoV7G5hli", - "outputId": "8ce62cd9-afb5-4eeb-af77-c21e11e04c79" - }, - "execution_count": 14, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['Gm28653', 'Ptp4a1.1', 'Arhgef4.1', 'Asdurf', 'AC169382.1']" - ] - }, - "metadata": {}, - "execution_count": 14 - } + "output_type": "execute_result", + "data": { + "text/plain": [ + "['Gm28653', 'Ptp4a1.1', 'Arhgef4.1', 'Asdurf', 'AC169382.1']" ] + }, + "metadata": {}, + "execution_count": 14 } - ] -} \ No newline at end of file + ] + } + ] +}