Skip to content

Commit

Permalink
start work ontology import
Browse files Browse the repository at this point in the history
  • Loading branch information
Corentin committed Sep 8, 2023
1 parent 38560a3 commit 7e6984b
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 4 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -178,4 +178,7 @@ docker/run.sh
IMPatienT
!data/images/demo_patient
.idea
.ruff_cache
.ruff_cache
data/backup/*
notebooks/*
!notebooks/*.ipynb
211 changes: 211 additions & 0 deletions notebooks/import_ontology.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "dumps() got multiple values for argument 'format'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[23], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpronto\u001b[39;00m \u001b[39mimport\u001b[39;00m Ontology\n\u001b[1;32m 2\u001b[0m go \u001b[39m=\u001b[39m Ontology(\u001b[39m\"\u001b[39m\u001b[39mgoslim_agr.obo\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m go_json \u001b[39m=\u001b[39m go\u001b[39m.\u001b[39;49mdumps(f, \u001b[39mformat\u001b[39;49m\u001b[39m=\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mjson\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n",
"\u001b[0;31mTypeError\u001b[0m: dumps() got multiple values for argument 'format'"
]
}
],
"source": [
"from pronto import Ontology\n",
"go = Ontology(\"goslim_agr.obo\")\n",
"go"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"with open(\"ms.json\", \"wb\") as f:\n",
" go.dumps(f, format=\"json\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['nodes', 'edges', 'id', 'lbl', 'meta', 'equivalentNodesSets', 'logicalDefinitionAxioms', 'domainRangeAxioms', 'propertyChainAxioms'])"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"go[\"graphs\"][0].keys()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"with open(\"ms.json\", \"r\") as f:\n",
" go = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'definition': None,\n",
" 'comments': [],\n",
" 'subsets': ['chebi_ph7_3',\n",
" 'gocheck_do_not_annotate',\n",
" 'gocheck_do_not_manually_annotate',\n",
" 'goslim_agr',\n",
" 'goslim_aspergillus',\n",
" 'goslim_candida',\n",
" 'goslim_chembl',\n",
" 'goslim_drosophila',\n",
" 'goslim_flybase_ribbon',\n",
" 'goslim_generic',\n",
" 'goslim_metagenomics',\n",
" 'goslim_mouse',\n",
" 'goslim_pir',\n",
" 'goslim_plant',\n",
" 'goslim_pombe',\n",
" 'goslim_synapse',\n",
" 'goslim_yeast',\n",
" 'prokaryote_subset'],\n",
" 'xrefs': [],\n",
" 'synonyms': [],\n",
" 'basicPropertyValues': [{'pred': 'http://www.geneontology.org/formats/oboInOwl#hasOBOFormatVersion',\n",
" 'val': '1.2',\n",
" 'xrefs': [],\n",
" 'meta': None},\n",
" {'pred': 'http://purl.obolibrary.org/obo/owl_versionInfo',\n",
" 'val': '2023-07-27',\n",
" 'xrefs': [],\n",
" 'meta': None}],\n",
" 'version': 'http://purl.obolibrary.org/obo/go/subsets/goslim_agr/go/2023-07-27/subsets/goslim_agr.owl/go/subsets/goslim_agr.owl',\n",
" 'deprecated': False}"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"go[\"graphs\"][0][\"meta\"]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Term('GO:0000003', name='reproduction')\n",
"Term('GO:0002376', name='immune system process')\n",
"Term('GO:0003677', name='DNA binding')\n",
"Term('GO:0003700', name='DNA-binding transcription factor activity')\n",
"Term('GO:0003723', name='RNA binding')\n",
"Term('GO:0003824', name='catalytic activity')\n",
"Term('GO:0005102', name='signaling receptor binding')\n",
"Term('GO:0005198', name='structural molecule activity')\n",
"Term('GO:0005215', name='transporter activity')\n",
"Term('GO:0005576', name='extracellular region')\n",
"Term('GO:0005634', name='nucleus')\n",
"Term('GO:0005694', name='chromosome')\n",
"Term('GO:0005739', name='mitochondrion')\n",
"Term('GO:0005768', name='endosome')\n",
"Term('GO:0005773', name='vacuole')\n",
"Term('GO:0005783', name='endoplasmic reticulum')\n",
"Term('GO:0005794', name='Golgi apparatus')\n",
"Term('GO:0005829', name='cytosol')\n",
"Term('GO:0005856', name='cytoskeleton')\n",
"Term('GO:0005886', name='plasma membrane')\n",
"Term('GO:0005975', name='carbohydrate metabolic process')\n",
"Term('GO:0006259', name='DNA metabolic process')\n",
"Term('GO:0006629', name='lipid metabolic process')\n",
"Term('GO:0007049', name='cell cycle')\n",
"Term('GO:0007610', name='behavior')\n",
"Term('GO:0008092', name='cytoskeletal protein binding')\n",
"Term('GO:0008134', name='transcription factor binding')\n",
"Term('GO:0008283', name='cell population proliferation')\n",
"Term('GO:0008289', name='lipid binding')\n",
"Term('GO:0009056', name='catabolic process')\n",
"Term('GO:0012501', name='programmed cell death')\n",
"Term('GO:0016043', name='cellular component organization')\n",
"Term('GO:0016070', name='RNA metabolic process')\n",
"Term('GO:0019538', name='protein metabolic process')\n",
"Term('GO:0023052', name='signaling')\n",
"Term('GO:0030054', name='cell junction')\n",
"Term('GO:0030154', name='cell differentiation')\n",
"Term('GO:0030234', name='enzyme regulator activity')\n",
"Term('GO:0030246', name='carbohydrate binding')\n",
"Term('GO:0031410', name='cytoplasmic vesicle')\n",
"Term('GO:0032502', name='developmental process')\n",
"Term('GO:0032991', name='protein-containing complex')\n",
"Term('GO:0036094', name='small molecule binding')\n",
"Term('GO:0038023', name='signaling receptor activity')\n",
"Term('GO:0042592', name='homeostatic process')\n",
"Term('GO:0042995', name='cell projection')\n",
"Term('GO:0045202', name='synapse')\n",
"Term('GO:0050877', name='nervous system process')\n",
"Term('GO:0050896', name='response to stimulus')\n",
"Term('GO:0051234', name='establishment of localization')\n",
"Term('GO:0097367', name='carbohydrate derivative binding')\n",
"Term('GO:1901135', name='carbohydrate derivative metabolic process')\n",
"Term('GO:0046872', name='metal ion binding')\n"
]
}
],
"source": [
"for terms in go.terms():\n",
" print(terms)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.16"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
85 changes: 82 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ fr_core_news_sm = { url = "https://github.com/explosion/spacy-models/releases/do
Flask-Cors = "^3.0.10"
textacy = "^0.12.0"
bleach = "^5.0.1"
pronto = "^2.5.5"

[tool.poetry.group.dev.dependencies]
ruff = "^0.0.221"
Expand Down

0 comments on commit 7e6984b

Please sign in to comment.