Skip to content

Commit

Permalink
ontology conversion class
Browse files Browse the repository at this point in the history
  • Loading branch information
Corentin committed Sep 11, 2023
1 parent fe848a0 commit 00e6f56
Showing 1 changed file with 97 additions and 164 deletions.
261 changes: 97 additions & 164 deletions notebooks/import_ontology.ipynb
Original file line number Diff line number Diff line change
@@ -1,86 +1,93 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pronto import Ontology\n",
"go = Ontology(\"go.obo\")\n",
"go"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(\"ms.json\", \"wb\") as f:\n",
" go.dump(f, format=\"json\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"with open(\"ms.json\", \"r\") as f:\n",
" go = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"go[\"graphs\"][0].keys()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"go[\"graphs\"][0][\"nodes\"][0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"edge_dict: dict = {}\n",
"for relationship in go[\"graphs\"][0][\"edges\"]:\n",
" parent_list = edge_dict.get(relationship[\"sub\"].split(\"/\")[-1], [])\n",
" parent_list.append((relationship[\"obj\"].split(\"/\")[-1], relationship[\"pred\"]))\n",
" edge_dict[relationship[\"sub\"].split(\"/\")[-1]] = parent_list"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"edge_dict"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for go_term in go[\"graphs\"][0][\"nodes\"]:\n",
" if go_term[\"type\"] != \"CLASS\":\n",
" print(go_term)"
"import random\n",
"from pronto import Ontology, Definition\n",
"\n",
"class ImpatientVocab():\n",
" def __init__(self) -> None:\n",
" self.used_colors: list[str] = []\n",
" self.impatient_json: list[dict] = []\n",
" self.impatient_onto: Ontology = None\n",
" self.list_of_terms: list[str] = []\n",
"\n",
" def load_json(self, path: str) -> list[dict]:\n",
" self.impatient_json = json.load(open(path, \"r\"))\n",
" return self.impatient_json\n",
" \n",
" def load_ontology(self, path: str) -> Ontology:\n",
" self.impatient_onto = Ontology(path)\n",
" return self.impatient_onto\n",
" \n",
" def json_to_onto(self) -> Ontology:\n",
" self.impatient_onto = Ontology()\n",
" for term in self.impatient_json:\n",
" added_term = self.impatient_onto.create_term(term[\"id\"].replace(\"_\", \":\"))\n",
" added_term.name = term[\"text\"]\n",
" for syn in term[\"data\"][\"synonymes\"].split(\",\"):\n",
" if syn != \"\":\n",
" added_term.add_synonym(syn, scope=\"EXACT\")\n",
" if term[\"data\"][\"description\"] != \"\":\n",
" added_term.definition = Definition(term[\"data\"][\"description\"])\n",
" if term[\"parent\"] != \"#\":\n",
" added_term.superclasses().add(self.impatient_onto[term[\"parent\"].replace(\"_\", \":\")])\n",
" \n",
" self.list_of_terms.append(added_term)\n",
" return self.impatient_onto\n",
" \n",
" def onto_to_json(self) -> list[dict]:\n",
" self.impatient_json = []\n",
" index = 0\n",
" for term in self.impatient_onto.terms():\n",
" relationships = []\n",
" for rel in term.superclasses():\n",
" relationships.append(rel.id)\n",
" relationships.pop(0)\n",
" self.impatient_json.append(\n",
" {\n",
" \"id\": term.id.replace(\"_\", \":\"),\n",
" \"text\": term.name,\n",
" \"icon\": True,\n",
" \"data\": {\n",
" \"description\": term.definition if term.definition is not None else \"\",\n",
" \"synonymes\": \",\".join([syn.description for syn in term.synonyms]),\n",
" \"phenotype_datamined\": \"\",\n",
" \"gene_datamined\": \"\",\n",
" \"alternative_language\": term.name,\n",
" \"correlates_with\": \"\",\n",
" \"image_annotation\": True if index == 0 else False,\n",
" \"hex_color\": self._generate_hex_color(),\n",
" \"hpo_datamined\": \"\",\n",
" },\n",
" \"parent\": relationships[0].replace(\"_\", \":\") if relationships != [] else \"#\"\n",
" }\n",
" )\n",
" index += 1\n",
" return self.impatient_json\n",
" \n",
" def _generate_hex_color(self):\n",
" while True:\n",
" # Generate a random hex color\n",
" color = \"#{:06x}\".format(random.randint(0, 0xFFFFFF))\n",
" # Check if the color has already been used\n",
" if color not in self.used_colors:\n",
" # Add the color to the list of used colors and return it\n",
" self.used_colors.append(color)\n",
" return color\n",
" \n",
" def dump_onto(self, path: str) -> None:\n",
" with open(path, \"wb\") as f:\n",
" self.impatient_onto.dump(f, format=\"obo\")\n",
"\n",
" def dump_json(self, path: str) -> None:\n",
" with open(path, \"w\") as f:\n",
" json.dump(self.impatient_json, f, indent=2)"
]
},
{
Expand All @@ -89,101 +96,22 @@
"metadata": {},
"outputs": [],
"source": [
"names: list[str] = []\n",
"id: list[str] = []\n",
"desc: list[str] = []\n",
"synonymes: list[list[str]] = []\n",
"\n",
"for go_term in go[\"graphs\"][0][\"nodes\"]:\n",
" if go_term[\"type\"] == \"CLASS\":\n",
" id.append(go_term[\"id\"].split(\"/\")[-1])\n",
" names.append(go_term[\"lbl\"])\n",
" desc.append(go_term[\"meta\"][\"definition\"][\"val\"])\n",
" synonymes.append([syn[\"val\"] for syn in go_term[\"meta\"][\"synonyms\"]])"
"my_onto = ImpatientVocab()\n",
"my_onto.load_json(\"ontology.json.demo\")\n",
"my_onto.json_to_onto()\n",
"my_onto.dump_onto(\"ontology_imp.obo\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import jsonschema\n",
"from jsonschema import validate\n",
"\n",
"impatient_json: list[dict] = []\n",
"impatient_json_schema = {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"id\": {\"type\": \"string\"},\n",
" \"text\": {\"type\": \"string\"},\n",
" \"icon\": {\"type\": \"boolean\"},\n",
" \"data\": {\n",
" \"type\": \"object\",\n",
" \"properties\": {\n",
" \"description\": {\"type\": \"string\"},\n",
" \"synonymes\": {\"type\": \"string\"},\n",
" \"phenotype_datamined\": {\"type\": \"string\"},\n",
" \"gene_datamined\": {\"type\": \"string\"},\n",
" \"alternative_language\": {\"type\": \"string\"},\n",
" \"correlates_with\": {\"type\": \"string\"},\n",
" \"image_annotation\": {\"type\": \"boolean\"},\n",
" \"hex_color\": {\"type\": \"string\", \"pattern\": \"^#[0-9a-fA-F]{6}$\"},\n",
" \"hpo_datamined\": {\"type\": \"string\"},\n",
" },\n",
" \"required\": [\n",
" \"description\",\n",
" \"synonymes\",\n",
" \"phenotype_datamined\",\n",
" \"gene_datamined\",\n",
" \"alternative_language\",\n",
" \"correlates_with\",\n",
" \"image_annotation\",\n",
" \"hex_color\",\n",
" \"hpo_datamined\",\n",
" ],\n",
" },\n",
" \"parent\": {\"type\": \"string\"},\n",
" },\n",
" \"required\": [\"id\", \"text\", \"icon\", \"data\", \"parent\"],\n",
"}\n",
"\n",
"for index in range(len(id)):\n",
" impatient_json.append(\n",
" {\n",
" \"id\": id[index].replace(\"_\", \":\"),\n",
" \"text\": names[index],\n",
" \"icon\": True,\n",
" \"data\": {\n",
" \"description\": desc[index],\n",
" \"synonymes\": ','.join(synonymes[index]),\n",
" \"phenotype_datamined\": \"\",\n",
" \"gene_datamined\": \"\",\n",
" \"alternative_language\": names[index],\n",
" \"correlates_with\": \"\",\n",
" \"image_annotation\": True if index==0 else False,\n",
" \"hex_color\": \"#FFFFFF\",\n",
" \"hpo_datamined\": \"\",\n",
" },\n",
" \"parent\": \"#\",\n",
" }\n",
" )\n",
" \n",
"for child, parent in edge_dict.items():\n",
" try:\n",
" index_term = id.index(child)\n",
" except ValueError:\n",
" print(f\"Term {child} not found in the list of terms\")\n",
" continue\n",
" # Only one parent so yeah we are loosing information.\n",
" impatient_json[index_term][\"parent\"] = parent[0][0].replace(\"_\", \":\")"
"my_onto = ImpatientVocab()\n",
"my_onto.load_ontology(\"goslim_agr.obo\")\n",
"my_onto.onto_to_json()\n",
"my_onto.dump_json(\"obo_to_json_GO.json\")"
]
},
{
Expand All @@ -192,7 +120,10 @@
"metadata": {},
"outputs": [],
"source": [
"json.dump(impatient_json, open(\"impatient.json\", \"w\"))"
"my_onto = ImpatientVocab()\n",
"my_onto.load_ontology(\"ontology_imp.obo\")\n",
"my_onto.onto_to_json()\n",
"my_onto.dump_json(\"obo_to_json_IMP.json\")"
]
},
{
Expand All @@ -201,8 +132,10 @@
"metadata": {},
"outputs": [],
"source": [
"for idx, json_data in enumerate(impatient_json, start=1):\n",
" validate(instance=json_data, schema=impatient_json_schema)"
"my_onto = ImpatientVocab()\n",
"my_onto.load_ontology(\"hp.owl\")\n",
"my_onto.onto_to_json()\n",
"my_onto.dump_json(\"obo_to_json_HPO.json\")"
]
}
],
Expand Down

0 comments on commit 00e6f56

Please sign in to comment.