Skip to content

Commit

Permalink
upload and download ontologies format
Browse files Browse the repository at this point in the history
  • Loading branch information
Corentin committed Sep 28, 2023
1 parent 00e6f56 commit a87f472
Show file tree
Hide file tree
Showing 11 changed files with 447 additions and 23 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,6 @@ IMPatienT
.ruff_cache
data/backup/*
notebooks/*
!notebooks/*.ipynb
!notebooks/*.ipynb
profile.json
profile.html
21 changes: 13 additions & 8 deletions app/dashapp/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,19 @@ def get_external_stylesheets():
dbc.Row(
[
dbc.Col(
html.P(
"This is the image annotation tool interface. "
"Select the standard vocabulary term and draw on the image to annotate parts of the image. "
"Then check the 'Compute Segmentation' tickbox to automatically expands your annotations to the whole image. "
"You may add more marks to clarify parts of the image where the classifier was not successful",
"and the classification will update. Once satisfied with the annotations area you can click the"
"'Save Annotation To Database' to save your annotations.",
),
[
html.B(
"If your image is not displayed please hit F5 to refresh the page, it should solve most issues."
),
html.P(
"This is the image annotation tool interface. "
"Select the standard vocabulary term and draw on the image to annotate parts of the image. "
"Then check the 'Compute Segmentation' tickbox to automatically expands your annotations to the whole image. "
"You may add more marks to clarify parts of the image where the classifier was not successful",
"and the classification will update. Once satisfied with the annotations area you can click the"
"'Save Annotation To Database' to save your annotations.",
),
],
md=True,
),
]
Expand Down
3 changes: 1 addition & 2 deletions app/historeport/ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def __init__(self, file_obj, lang):
self.ontology_path = os.path.join(
current_app.config["ONTOLOGY_FOLDER"], "ontology.json"
)
self.image_stack = []
self.raw_text = ""
self.text_as_list = []
self.sentence_as_list = []
Expand All @@ -43,6 +42,7 @@ def __init__(self, file_obj, lang):
self.negex_sent = current_app.config["NEGEX_SENT_EN"]
self.all_stopwords = self.nlp.Defaults.stop_words
self.results_match_dict = {}
self.image_stack = convert_from_bytes(self.file_obj.read())

def get_grayscale(self, image):
"""Convert an image as numpy array to grayscale
Expand Down Expand Up @@ -73,7 +73,6 @@ def pdf_to_text(self):
Returns:
str: raw text as a string
"""
self.image_stack = convert_from_bytes(self.file_obj.read())
page_list = []
# Loop on each image (page) of the PDF file
for image in self.image_stack:
Expand Down
123 changes: 123 additions & 0 deletions app/historeport/onto_func.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,127 @@
import json
import random
from pronto import Ontology, Definition
import io
from flask_wtf.file import FileField


class ImpatientVocab:
def __init__(self) -> None:
self.used_colors: list[str] = []
self.impatient_json: list[dict] = []
self.impatient_onto: Ontology = None
self.list_of_terms: list[str] = []

def load_json(self, path: str) -> list[dict]:
self.impatient_json = json.load(open(path, "r"))
return self.impatient_json

def load_ontology(self, path: str) -> Ontology:
self.impatient_onto = Ontology(path)
return self.impatient_onto

def load_json_f(self, file: FileField) -> list[dict]:
# Read the JSON data from the file object
json_data = json.loads(file.read())
self.impatient_json = json_data
return json_data

def load_ontology_f(self, file: FileField) -> Ontology:
# Read the ontology data from the file object
ontology_data = io.BytesIO(file.read())
ontology = Ontology(ontology_data)
self.impatient_onto = ontology
return ontology

def json_to_onto(self) -> Ontology:
self.impatient_onto = Ontology()
term_mapping = (
{}
) # A dictionary to store term IDs and their corresponding created terms

# First pass: Create terms without adding superclasses
for term in self.impatient_json:
term_id = term["id"].replace("_", ":")
added_term = self.impatient_onto.create_term(term_id)
added_term.name = term["text"]
for syn in term["data"]["synonymes"].split(","):
if syn.strip() != "":
added_term.add_synonym(syn.strip(), scope="EXACT")
if term["data"]["description"] != "":
added_term.definition = Definition(term["data"]["description"])

term_mapping[term_id] = added_term # Store the term in the mapping

# Second pass: Add superclasses
for term in self.impatient_json:
term_id = term["id"].replace("_", ":")
added_term = term_mapping[term_id]

if term["parent"] != "#":
parent_id = term["parent"].replace("_", ":")
parent_term = term_mapping.get(parent_id)
if parent_term:
added_term.superclasses().add(parent_term)

self.list_of_terms.append(added_term)

return self.impatient_onto

def onto_to_json(self) -> list[dict]:
self.impatient_json = []
index = 0
for term in self.impatient_onto.terms():
relationships = []
for rel in term.superclasses():
relationships.append(rel.id)
relationships.pop(0)
self.impatient_json.append(
{
"id": term.id.replace("_", ":"),
"text": term.name if term.name is not None else "",
"icon": True,
"data": {
"description": term.definition
if term.definition is not None
else "",
"synonymes": ",".join(
[syn.description for syn in term.synonyms]
),
"phenotype_datamined": "",
"gene_datamined": "",
"alternative_language": term.name
if term.name is not None
else "",
"correlates_with": "",
"image_annotation": True if index == 0 else False,
"hex_color": self._generate_hex_color(),
"hpo_datamined": "",
},
"parent": relationships[0].replace("_", ":")
if relationships != []
else "#",
}
)
index += 1
return self.impatient_json

def _generate_hex_color(self):
while True:
# Generate a random hex color
color = "#{:06x}".format(random.randint(0, 0xFFFFFF))
# Check if the color has already been used
if color not in self.used_colors:
# Add the color to the list of used colors and return it
self.used_colors.append(color)
return color

def dump_onto(self, path: str) -> None:
with open(path, "wb") as f:
self.impatient_onto.dump(f, format="obo")

def dump_json(self, path: str) -> None:
with open(path, "w") as f:
json.dump(self.impatient_json, f, indent=2)


class StandardVocabulary:
Expand Down
21 changes: 21 additions & 0 deletions app/ontocreate/forms.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,28 @@
from flask_wtf import FlaskForm
from flask_wtf.file import FileAllowed, FileField, FileRequired
from wtforms.validators import DataRequired
from wtforms import StringField, SubmitField, TextAreaField, BooleanField


class OntoUpload(FlaskForm):
"""Form for uploading new ontology.
Args:
FlaskForm (FlaskForm Class): The FlaskForm Class
"""

onto_file = FileField(
validators=[
FileAllowed(
["json", "obo", "owl"],
"This file is not a valid ontology file !",
),
],
render_kw={"class": "form-control-file border"},
)
submit = SubmitField("Confirm Upload", render_kw={"class": "btn btn-warning"})


class OntologyDescript(FlaskForm):
"""Form used to show and save modification of nodes from the standard vocabulary
tree in the standard vocabulary creator module
Expand Down
89 changes: 83 additions & 6 deletions app/ontocreate/routes.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import json
import os
import jsonschema
from jsonschema import validate

import bleach

from werkzeug.utils import secure_filename
from app import db
from app.historeport.onto_func import StandardVocabulary
from app.historeport.onto_func import StandardVocabulary, ImpatientVocab
from app.models import ReportHisto
from app.ontocreate import bp
from app.ontocreate.forms import InvertLangButton, OntologyDescript
from app.ontocreate.forms import InvertLangButton, OntologyDescript, OntoUpload
from app.histostats.vizualisation import (
db_to_df,
table_to_df,
Expand Down Expand Up @@ -50,7 +52,50 @@ def ontocreate():
"""
form = OntologyDescript()
form2 = InvertLangButton()
return render_template("ontocreate.html", form=form, form2=form2)
form_onto = OntoUpload()
if form_onto.validate_on_submit() and form_onto.onto_file.data:
# Get the uploaded file
uploaded_file = form_onto.onto_file.data
# Check if the file has an allowed extension
if uploaded_file.filename[-4:] == "json":
onto_data = ImpatientVocab()
onto_data.load_json_f(uploaded_file)

else:
onto_data = ImpatientVocab()
onto_data.load_ontology_f(uploaded_file)
onto_data.onto_to_json()

validate(
instance=onto_data.impatient_json,
schema=current_app.config["ONTO_SCHEMA"],
)
onto_data.impatient_json[0]["data"]["image_annotation"] = True
file_path = os.path.join(current_app.config["ONTOLOGY_FOLDER"], "ontology.json")
flag_valid = True

if flag_valid:
onto_data.dump_json(file_path)
for report in ReportHisto.query.all():
report.ontology_tree = onto_data.impatient_json
flag_modified(report, "ontology_tree")
db.session.commit()
# Update The DashApp Callback & layout
# By Force reloading the layout code & callbacks
dashapp = current_app.config["DASHAPP"]
with current_app.app_context():
import importlib
import sys

importlib.reload(sys.modules["app.dashapp.callbacks"])
import app.dashapp.layout

importlib.reload(app.dashapp.layout)
dashapp.layout = app.dashapp.layout.layout
return redirect(url_for("ontocreate.ontocreate"))
return render_template(
"ontocreate.html", form=form, form2=form2, form_onto=form_onto
)


@bp.route("/modify_onto", methods=["PATCH"])
Expand Down Expand Up @@ -88,8 +133,10 @@ def modify_onto():
template_ontology = StandardVocabulary(clean_tree)
for report in ReportHisto.query.all():
current_report_ontology = StandardVocabulary(report.ontology_tree)
updated_report_ontology = json.loads(bleach.clean(
json.dumps(current_report_ontology.update_ontology(template_ontology)))
updated_report_ontology = json.loads(
bleach.clean(
json.dumps(current_report_ontology.update_ontology(template_ontology))
)
)
# Issue: SQLAlchemy not updating JSON https://stackoverflow.com/questions/42559434/updates-to-json-field-dont-persist-to-db

Expand Down Expand Up @@ -125,6 +172,36 @@ def download_onto():
)


@bp.route("/upload_onto", methods=["POST"])
@login_required
def upload_onto():
"""Route to upload an ontology in place as JSON, OWL or OBO file."""
return send_from_directory(
current_app.config["ONTOLOGY_FOLDER"], "ontology.json", as_attachment=True
)


@bp.route("/download_onto_as_obo", methods=["GET"])
@login_required
def download_onto_as_obo():
"""Route to download the standard vocabulary JSON file.
Returns:
File: returns the file
"""
my_onto = ImpatientVocab()
my_onto.load_json(
os.path.join(current_app.config["ONTOLOGY_FOLDER"], "ontology.json")
)
my_onto.json_to_onto()
my_onto.dump_onto(
os.path.join(current_app.config["ONTOLOGY_FOLDER"], "ontology.obo")
)
return send_from_directory(
current_app.config["ONTOLOGY_FOLDER"], "ontology.obo", as_attachment=True
)


@bp.route("/invert_lang", methods=["POST"])
@login_required
def invert_lang():
Expand Down
38 changes: 37 additions & 1 deletion app/ontocreate/templates/ontocreate.html
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,42 @@
<h1>Standard Vocabulary Tree</h1>
<a href="{{ url_for('ontocreate.download_onto') }}"><i class="fa-solid fa-download"></i> Download Vocabulary (.JSON)
</a>
<a href="{{ url_for('ontocreate.download_onto_as_obo') }}"><i class="fa-solid fa-download"></i> Download Vocabulary
(.OBO)
</a>

<!-- Button Show Popup -->
<button type="button" class="btn btn-danger btn-sm" data-bs-toggle="modal" data-bs-target="#RepredictPopup"><i
class="fa-solid fa-upload"></i>
Upload Vocab (Experimental)
</button>

<!-- Reprediction Confirmation Popup -->
<div class="modal fade" id="RepredictPopup" tabindex="-1" aria-labelledby="RepredictPopupLabel" aria-hidden="true">
<div class="modal-dialog">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="RepredictPopupLabel">Confirm Upload Vocabulary</h5>

<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
</div>
<div class="modal-footer">
<p>This will replace the current vocabulary with your custom file. Please note that this will DELETE all
terms annotation for each patient because you are totally repalce your previous vocabulary. You will have
to re-annotate all patients from the database.This feature is experimental and might not work with all
vocabularies in OWL and OBO formats. Espcially it has issue with big ones (>1000 classes). In case of
error, the vocabulary will not be switched.</p>
<form action="" method="post" enctype="multipart/form-data" style="display: inline">
{{ form_onto.hidden_tag() }} {{ form_onto.onto_file }} {{ form_onto.submit }}
</form>
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal">
Close
</button>
</div>
</div>
</div>
</div>

<input type="text" id="plugins4_q" value="" type="search" id="form1" class="form-control" placeholder="Search" />
<div id="jstree" class="demo" style="overflow: scroll; max-height: 600px"></div>

Expand Down Expand Up @@ -97,4 +133,4 @@ <h1>Vocabulary Properties</h1>
<meta id="data-url" data-jstree="{{url_for('ontocreate.onto_json', filename='ontology.json')}}"
data-savetree="{{url_for('ontocreate.modify_onto')}}" />
<script src="{{ url_for('ontocreate.static', filename='ontocreate.js') }}"></script>
{% endblock %}
{% endblock %}
Loading

0 comments on commit a87f472

Please sign in to comment.