diff --git a/bdikit/visualization/scope_reducing.py b/bdikit/visualization/scope_reducing.py
index 64ba9765..0256d240 100644
--- a/bdikit/visualization/scope_reducing.py
+++ b/bdikit/visualization/scope_reducing.py
@@ -1,4 +1,5 @@
import json
+import logging
import altair as alt
import numpy as np
@@ -11,6 +12,8 @@
pn.extension("mathjax")
pn.extension("vega")
+logger = logging.getLogger(__name__)
+
class SRHeatMapManager:
def __init__(self) -> None:
@@ -23,6 +26,9 @@ def __init__(self) -> None:
self.rec_cols_gdc = None
self.clusters = None
+ # Selected column
+ self.selected_row = None
+
def _load_json(self):
with open(self.json_path) as f:
data = json.load(f)
@@ -32,7 +38,8 @@ def _write_json(self, data):
with open(self.json_path, "w") as f:
json.dump(data, f)
- def get_heatmap(self, recommendations):
+ def get_heatmap(self):
+ recommendations = self._load_json()
rec_cols = set()
rec_table = []
rec_list = []
@@ -124,6 +131,48 @@ def _get_column_values(self, properties):
else:
return None
+ def _accept_match(self, col_name=None, match_name=None):
+ if self.selected_row is None:
+ return
+ col_name = self.selected_row["Column"].values[0]
+ match_name = self.selected_row["Recommendation"].values[0]
+ recommendations = self._load_json()
+ for idx, d in enumerate(recommendations):
+ candidate_name = d["Candidate column"]
+ if candidate_name != col_name:
+ continue
+ for top_k_name, top_k_score in d["Top k columns"]:
+ if top_k_name == match_name:
+ recommendations[idx] = {
+ "Candidate column": candidate_name,
+ "Top k columns": [[top_k_name, top_k_score]],
+ }
+ self._write_json(recommendations)
+ self.get_heatmap()
+ return
+
+ def _reject_match(self):
+ if self.selected_row is None:
+ return
+ col_name = self.selected_row["Column"].values[0]
+ match_name = self.selected_row["Recommendation"].values[0]
+ recommendations = self._load_json()
+ for idx, d in enumerate(recommendations):
+ candidate_name = d["Candidate column"]
+ if candidate_name != col_name:
+ continue
+ new_top_k = []
+ for top_k_name, top_k_score in d["Top k columns"]:
+ if top_k_name != match_name:
+ new_top_k.append([top_k_name, top_k_score])
+ recommendations[idx] = {
+ "Candidate column": candidate_name,
+ "Top k columns": new_top_k,
+ }
+ self._write_json(recommendations)
+ self.get_heatmap()
+ return
+
def get_clusters(self):
words = self.rec_table_df["Column"].to_numpy()
lev_similarity = -1 * np.array(
@@ -136,19 +185,52 @@ def get_clusters(self):
)
affprop.fit(lev_similarity)
- print(f"Number of clusters: {np.unique(affprop.labels_).shape[0]}\n")
+ logger.debug(f"Number of clusters: {np.unique(affprop.labels_).shape[0]}\n")
cluster_names = []
clusters = {}
for cluster_id in np.unique(affprop.labels_):
exemplar = words[affprop.cluster_centers_indices_[cluster_id]]
cluster = np.unique(words[np.nonzero(affprop.labels_ == cluster_id)])
cluster_str = ", ".join(cluster)
- print(" - *%s:* %s" % (exemplar, cluster_str))
+ logger.debug(" - *%s:* %s" % (exemplar, cluster_str))
cluster_names.append(exemplar)
clusters[exemplar] = cluster
self.clusters = clusters
- def _plot_heatmap(self, clusters=[], subschemas=[], threshold=0.5):
+ def _plot_heatmap_base(self, heatmap_rec_list):
+ single = alt.selection_point(name="single")
+ base = (
+ alt.Chart(heatmap_rec_list)
+ .mark_rect(size=100)
+ .encode(
+ y=alt.X("Column:O", sort=None),
+ x=alt.X(f"Recommendation:O", sort=None),
+ color=alt.condition(single, "Value:Q", alt.value("lightgray")),
+ # color="Value:Q",
+ tooltip=[
+ alt.Tooltip("Column", title="Column"),
+ alt.Tooltip("Recommendation", title="Recommendation"),
+ alt.Tooltip("Value", title="Value"),
+ ],
+ )
+ .add_params(single)
+ )
+ return pn.pane.Vega(base)
+
+ def _plot_selected_row(self, heatmap_rec_list, selection):
+ if not selection:
+ return "## No selection"
+ selected_row = heatmap_rec_list.iloc[selection]
+ column = selected_row["Column"].values[0]
+ rec = selected_row["Recommendation"].values[0]
+ # value = selected_row["Value"]
+ # self._accept_match(column, rec)
+ self.selected_row = selected_row
+ return pn.widgets.DataFrame(selected_row)
+
+ def _plot_pane(
+ self, clusters=[], subschemas=[], threshold=0.5, acc_click=0, rej_click=0
+ ):
heatmap_rec_list = self.rec_list_df[self.rec_list_df["Value"] >= threshold]
if clusters:
clustered_cols = []
@@ -165,21 +247,15 @@ def _plot_heatmap(self, clusters=[], subschemas=[], threshold=0.5):
heatmap_rec_list["Recommendation"].isin(subschema_rec_cols)
]
- base = (
- alt.Chart(heatmap_rec_list)
- .mark_rect()
- .encode(
- y=alt.X("Column:O", sort=None),
- x=alt.X(f"Recommendation:O", sort=None),
- color="Value:Q",
- tooltip=[
- alt.Tooltip("Column", title="Column"),
- alt.Tooltip("Recommendation", title="Recommendation"),
- alt.Tooltip("Value", title="Value"),
- ],
- )
+ heatmap_pane = self._plot_heatmap_base(heatmap_rec_list)
+ return pn.Column(
+ heatmap_pane,
+ pn.bind(
+ self._plot_selected_row,
+ heatmap_rec_list,
+ heatmap_pane.selection.param.single,
+ ),
)
- return pn.pane.Vega(base)
def plot_heatmap(self):
select_cluster = pn.widgets.MultiChoice(
@@ -192,8 +268,26 @@ def plot_heatmap(self):
name="Threshold", start=0, end=1.0, step=0.01, value=0.5, width=220
)
+ acc_button = pn.widgets.Button(name="Accept Match", button_type="success")
+
+ rej_button = pn.widgets.Button(name="Decline Match", button_type="danger")
+
+ def on_click_accept_match(event):
+ self._accept_match()
+
+ def on_click_reject_match(event):
+ self._reject_match()
+
+ acc_button.on_click(on_click_accept_match)
+ rej_button.on_click(on_click_reject_match)
+
heatmap_bind = pn.bind(
- self._plot_heatmap, select_cluster, select_rec_groups, thresh_slider
+ self._plot_pane,
+ select_cluster,
+ select_rec_groups,
+ thresh_slider,
+ acc_button.param.clicks,
+ rej_button.param.clicks,
)
column_left = pn.Column(
@@ -201,6 +295,8 @@ def plot_heatmap(self):
select_cluster,
select_rec_groups,
thresh_slider,
+ acc_button,
+ rej_button,
styles=dict(background="WhiteSmoke"),
)
diff --git a/examples/scope_reducing_heatmap.ipynb b/examples/scope_reducing_heatmap.ipynb
index 21e32e48..1a101b53 100644
--- a/examples/scope_reducing_heatmap.ipynb
+++ b/examples/scope_reducing_heatmap.ipynb
@@ -9,7 +9,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -23,7 +23,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 2,
"metadata": {},
"outputs": [
{
@@ -70,12 +70,12 @@
"data": {
"application/vnd.holoviews_exec.v0+json": "",
"text/html": [
- "
\n",
- "
\n",
+ "
\n",
""
]
},
+ "metadata": {
+ "application/vnd.holoviews_exec.v0+json": {
+ "id": "aa16b927-ebd1-45d1-a966-2d079e223c93"
+ }
+ },
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.4.1'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = true;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'mathjax': '//cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS_HTML', 'vega-embed': 'https://cdn.jsdelivr.net/npm/vega-embed@6/build/vega-embed.min', 'vega-lite': 'https://cdn.jsdelivr.net/npm/vega-lite@5/build/vega-lite.min', 'vega': 'https://cdn.jsdelivr.net/npm/vega@5/build/vega.min'}, 'shim': {'mathjax': {'exports': 'MathJax'}}});\n require([\"mathjax\"], function() {\n\ton_load()\n })\n require([\"vega-embed\"], function(vegaEmbed) {\n\twindow.vegaEmbed = vegaEmbed\n\ton_load()\n })\n require([\"vega-lite\"], function(vl) {\n\twindow.vl = vl\n\ton_load()\n })\n require([\"vega\"], function(vega) {\n\twindow.vega = vega\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 4;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window.MathJax !== undefined) && (!(window.MathJax instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window.vega !== undefined) && (!(window.vega instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega@5'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window.vegaLite !== undefined) && (!(window.vegaLite instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega-lite@5'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window.vegaEmbed !== undefined) && (!(window.vegaEmbed instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega-embed@6'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML\", \"https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega@5\", \"https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega-lite@5\", \"https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega-embed@6\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));",
+ "application/vnd.holoviews_load.v0+json": ""
+ },
"metadata": {},
"output_type": "display_data"
},
{
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Tumor_Focality:\n"
- ]
+ "data": {
+ "application/javascript": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n",
+ "application/vnd.holoviews_load.v0+json": ""
+ },
+ "metadata": {},
+ "output_type": "display_data"
},
{
"data": {
"text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Candidate | \n",
- " Similarity | \n",
- " Description | \n",
- " Values (sample) | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " tumor_focality | \n",
- " 0.808 | \n",
- " The text term used to describe whether the patient's disease originated in a single location or multiple locations. | \n",
- " Multifocal, Unifocal, Unknown, Not Reported | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " tumor_shape | \n",
- " 0.5626 | \n",
- " Text term to represent the description of the shape of a tumor determined by clinical or pathological techniques. | \n",
- " Diffuse, Dome, Mushroom, Unknown | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " tumor_depth_descriptor | \n",
- " 0.5311 | \n",
- " Text term for the degree to which a tumor has penetrated into organ or tissue. | \n",
- " Deep, Superficial, Not Reported | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " enneking_msts_tumor_site | \n",
- " 0.4829 | \n",
- " Text term and code that represents the tumor site of the musculoskeletal sarcoma, using the Enneking staging system approved by the Musculoskeletal Tu... | \n",
- " Extracompartmental (T2), Intracompartmental (T1), Unknown, Not Reported | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " biospecimen_type | \n",
- " 0.4809 | \n",
- " The text term used to describe the biological material used for testing, diagnostic, treatment or research purposes. | \n",
- " Blood, Bone Marrow, Buccal Mucosa, Buffy Coat, Cerebrospinal Fluid, Connective Tissue, Embryonic Fluid, Embryonic Tissue, Feces, Granulocyte, Involved... | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " tissue_type | \n",
- " 0.4783 | \n",
- " Text term that represents a description of the kind of tissue collected with respect to disease status or proximity to tumor tissue. | \n",
- " Abnormal, Normal, Peritumoral, Tumor, Unknown, Not Reported, Not Allowed To Collect | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " wilms_tumor_histologic_subtype | \n",
- " 0.4597 | \n",
- " The text term used to describe the classification of Wilms tumors distinguishing between favorable and unfavorable histologic groups. | \n",
- " Favorable, Unfavorable, Unknown, Not Reported | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " residual_tumor_measurement | \n",
- " 0.4544 | \n",
- " A measurement of the tumor cells that remain in the body following cancer treatment. | \n",
- " 1-10 mm, 11-20 mm, >20 mm, No macroscopic disease | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " non_nodal_tumor_deposits | \n",
- " 0.451 | \n",
- " The yes/no/unknown indicator used to describe the presence of tumor deposits in the pericolic or perirectal fat or in adjacent mesentery away from the... | \n",
- " Yes, No, Unknown, Not Reported | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " tumor_infiltrating_macrophages | \n",
- " 0.4464 | \n",
- " Non-neoplastic macrophages that infiltrate a tumor. | \n",
- " Few, Many, Moderate | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " specimen_type | \n",
- " 0.443 | \n",
- " The type of a material sample taken from a biological entity for testing, diagnostic, propagation, treatment or research purposes. This includes parti... | \n",
- " 2D Classical Conditionally Reprogrammed Cells, 2D Modified Conditionally Reprogrammed Cells, 3D Air-Liquid Interface Organoid, 3D Neurosphere, 3D Orga... | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " extranodal_extension | \n",
- " 0.4366 | \n",
- " Extension of a malignant neoplasm beyond the lymph node capsule. | \n",
- " Gross Extension, Microscopic Extension, No Extranodal Extension | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " measurement_unit | \n",
- " 0.4319 | \n",
- " The type of unit of measure being used to express a length or distance. | \n",
- " Centimeters, Millimeters | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " columnar_mucosa_present | \n",
- " 0.4153 | \n",
- " Indicator noting whether columnar mucosa was present within the tissue. | \n",
- " Yes, No, Unknown, Not Reported | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " distance_normal_to_tumor | \n",
- " 0.4077 | \n",
- " Text term to signify the distance between the tumor tissue and the normal control tissue that was procured for matching normal DNA. | \n",
- " Adjacent (< or = 2cm), Distal (>2cm), Unknown, Not Reported | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " morphologic_architectural_pattern | \n",
- " 0.3949 | \n",
- " A specific morphologic or pathologic architectural pattern was discovered within the sample studied. | \n",
- " Cohesive, Cribiform, Micropapillary, Non-cohesive, Papillary Renal Cell, Papillary, NOS, Solid, Tubular | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " vascular_invasion_type | \n",
- " 0.3896 | \n",
- " Text term that represents the type of vascular tumor invasion. | \n",
- " Extramural, Intramural, Macro, Micro, No Vascular Invasion, Unknown, Not Reported | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " residual_tumor | \n",
- " 0.3751 | \n",
- " Tumor cells that remain in the body following cancer treatment. | \n",
- " R0, R1, R2, RX | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " enneking_msts_metastasis | \n",
- " 0.3703 | \n",
- " Text term and code that represents the metastatic stage of the musculoskeletal sarcoma, using the Enneking staging system approved by the Musculoskele... | \n",
- " No Metastasis (M0), Regional or Distant Metastasis (M1), Unknown, Not Reported | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " ovarian_specimen_status | \n",
- " 0.3582 | \n",
- " The text term used to describe the physical condition of the involved ovary. | \n",
- " Ovarian Capsule Fragmented, Ovarian Capsule Intact, Ovarian Capsule Ruptured, Unknown, Not Reported | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Candidate Similarity \\\n",
- "0 tumor_focality 0.808 \n",
- "1 tumor_shape 0.5626 \n",
- "2 tumor_depth_descriptor 0.5311 \n",
- "3 enneking_msts_tumor_site 0.4829 \n",
- "4 biospecimen_type 0.4809 \n",
- "5 tissue_type 0.4783 \n",
- "6 wilms_tumor_histologic_subtype 0.4597 \n",
- "7 residual_tumor_measurement 0.4544 \n",
- "8 non_nodal_tumor_deposits 0.451 \n",
- "9 tumor_infiltrating_macrophages 0.4464 \n",
- "10 specimen_type 0.443 \n",
- "11 extranodal_extension 0.4366 \n",
- "12 measurement_unit 0.4319 \n",
- "13 columnar_mucosa_present 0.4153 \n",
- "14 distance_normal_to_tumor 0.4077 \n",
- "15 morphologic_architectural_pattern 0.3949 \n",
- "16 vascular_invasion_type 0.3896 \n",
- "17 residual_tumor 0.3751 \n",
- "18 enneking_msts_metastasis 0.3703 \n",
- "19 ovarian_specimen_status 0.3582 \n",
- "\n",
- " Description \\\n",
- "0 The text term used to describe whether the patient's disease originated in a single location or multiple locations. \n",
- "1 Text term to represent the description of the shape of a tumor determined by clinical or pathological techniques. \n",
- "2 Text term for the degree to which a tumor has penetrated into organ or tissue. \n",
- "3 Text term and code that represents the tumor site of the musculoskeletal sarcoma, using the Enneking staging system approved by the Musculoskeletal Tu... \n",
- "4 The text term used to describe the biological material used for testing, diagnostic, treatment or research purposes. \n",
- "5 Text term that represents a description of the kind of tissue collected with respect to disease status or proximity to tumor tissue. \n",
- "6 The text term used to describe the classification of Wilms tumors distinguishing between favorable and unfavorable histologic groups. \n",
- "7 A measurement of the tumor cells that remain in the body following cancer treatment. \n",
- "8 The yes/no/unknown indicator used to describe the presence of tumor deposits in the pericolic or perirectal fat or in adjacent mesentery away from the... \n",
- "9 Non-neoplastic macrophages that infiltrate a tumor. \n",
- "10 The type of a material sample taken from a biological entity for testing, diagnostic, propagation, treatment or research purposes. This includes parti... \n",
- "11 Extension of a malignant neoplasm beyond the lymph node capsule. \n",
- "12 The type of unit of measure being used to express a length or distance. \n",
- "13 Indicator noting whether columnar mucosa was present within the tissue. \n",
- "14 Text term to signify the distance between the tumor tissue and the normal control tissue that was procured for matching normal DNA. \n",
- "15 A specific morphologic or pathologic architectural pattern was discovered within the sample studied. \n",
- "16 Text term that represents the type of vascular tumor invasion. \n",
- "17 Tumor cells that remain in the body following cancer treatment. \n",
- "18 Text term and code that represents the metastatic stage of the musculoskeletal sarcoma, using the Enneking staging system approved by the Musculoskele... \n",
- "19 The text term used to describe the physical condition of the involved ovary. \n",
+ ""
]
},
"metadata": {},
"output_type": "display_data"
},
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n",
- "Tumor_Size_cm:\n"
- ]
- },
{
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Candidate | \n",
- " Similarity | \n",
- " Description | \n",
- " Values (sample) | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " shortest_dimension | \n",
- " 0.7575 | \n",
- " Numeric value that represents the shortest dimension of the sample, measured in millimeters. | \n",
- " | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " size_extraocular_nodule | \n",
- " 0.7416 | \n",
- " The size of the nodule that is outside the eye. | \n",
- " | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " tumor_width_measurement | \n",
- " 0.7034 | \n",
- " The numerical measurement of tumor width. | \n",
- " | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " tumor_depth_measurement | \n",
- " 0.6989 | \n",
- " The numerical measurement of tumor depth. | \n",
- " | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " tumor_thickness | \n",
- " 0.6506 | \n",
- " A measurement of the thickness of a sectioned slice (of tissue or mineral or other substance) in millimeters (mm). | \n",
- " | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " analyte_quantity | \n",
- " 0.6418 | \n",
- " The quantity in micrograms (ug) of the analyte(s) derived from the analyte(s) shipped for sequencing and characterization. | \n",
- " | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " average_insert_size | \n",
- " 0.6362 | \n",
- " Average insert size collected from samtools. | \n",
- " | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " tumor_largest_dimension_diameter | \n",
- " 0.5736 | \n",
- " Numeric value used to describe the maximum diameter or dimension of the primary tumor, measured in centimeters. | \n",
- " | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " mitotic_total_area | \n",
- " 0.5635 | \n",
- " The total area reviewed when calculating the mitotic index ratio. | \n",
- " | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " rin | \n",
- " 0.5578 | \n",
- " A numerical assessment of the integrity of RNA based on the entire electrophoretic trace of the RNA sample including the presence or absence of degrad... | \n",
- " | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " size_selection_range | \n",
- " 0.5574 | \n",
- " Range of size selection. | \n",
- " | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " imaging_suv | \n",
- " 0.5519 | \n",
- " The standardized update value (SUV) is the effectively dimensionless measure of regional tracer uptake calculated as the activity concentration within... | \n",
- " | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " tumor_infiltrating_macrophages | \n",
- " 0.5417 | \n",
- " Non-neoplastic macrophages that infiltrate a tumor. | \n",
- " Few, Many, Moderate | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " breslow_thickness | \n",
- " 0.5358 | \n",
- " The number that describes the distance, in millimeters, between the upper layer of the epidermis and the deepest point of tumor penetration. | \n",
- " | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " circumferential_resection_margin | \n",
- " 0.5312 | \n",
- " Numeric value used to describe the non-peritonealised bare area of rectum, comprising anterior and posterior segments, when submitted as a surgical sp... | \n",
- " | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " body_surface_area | \n",
- " 0.5154 | \n",
- " Numeric value used to represent the 2-dimensional extent of the body surface relating height to weight. | \n",
- " | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " workflow_version | \n",
- " 0.5089 | \n",
- " Major version for a GDC workflow. | \n",
- " | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " mean_coverage | \n",
- " 0.5042 | \n",
- " Mean coverage for whole genome sequencing, or mean target coverage for whole exome and targeted sequencing, collected from Picard Tools. | \n",
- " | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " tumor_depth | \n",
- " 0.4949 | \n",
- " Numeric value that represents the depth of tumor invasion, measured in millimeters (mm). | \n",
- " | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " tumor_depth_descriptor | \n",
- " 0.4846 | \n",
- " Text term for the degree to which a tumor has penetrated into organ or tissue. | \n",
- " Deep, Superficial, Not Reported | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "5a1f31b2ebc24a6ab8953e084f9f00ab",
+ "version_major": 2,
+ "version_minor": 0
+ },
"text/plain": [
- " Candidate Similarity \\\n",
- "0 shortest_dimension 0.7575 \n",
- "1 size_extraocular_nodule 0.7416 \n",
- "2 tumor_width_measurement 0.7034 \n",
- "3 tumor_depth_measurement 0.6989 \n",
- "4 tumor_thickness 0.6506 \n",
- "5 analyte_quantity 0.6418 \n",
- "6 average_insert_size 0.6362 \n",
- "7 tumor_largest_dimension_diameter 0.5736 \n",
- "8 mitotic_total_area 0.5635 \n",
- "9 rin 0.5578 \n",
- "10 size_selection_range 0.5574 \n",
- "11 imaging_suv 0.5519 \n",
- "12 tumor_infiltrating_macrophages 0.5417 \n",
- "13 breslow_thickness 0.5358 \n",
- "14 circumferential_resection_margin 0.5312 \n",
- "15 body_surface_area 0.5154 \n",
- "16 workflow_version 0.5089 \n",
- "17 mean_coverage 0.5042 \n",
- "18 tumor_depth 0.4949 \n",
- "19 tumor_depth_descriptor 0.4846 \n",
- "\n",
- " Description \\\n",
- "0 Numeric value that represents the shortest dimension of the sample, measured in millimeters. \n",
- "1 The size of the nodule that is outside the eye. \n",
- "2 The numerical measurement of tumor width. \n",
- "3 The numerical measurement of tumor depth. \n",
- "4 A measurement of the thickness of a sectioned slice (of tissue or mineral or other substance) in millimeters (mm). \n",
- "5 The quantity in micrograms (ug) of the analyte(s) derived from the analyte(s) shipped for sequencing and characterization. \n",
- "6 Average insert size collected from samtools. \n",
- "7 Numeric value used to describe the maximum diameter or dimension of the primary tumor, measured in centimeters. \n",
- "8 The total area reviewed when calculating the mitotic index ratio. \n",
- "9 A numerical assessment of the integrity of RNA based on the entire electrophoretic trace of the RNA sample including the presence or absence of degrad... \n",
- "10 Range of size selection. \n",
- "11 The standardized update value (SUV) is the effectively dimensionless measure of regional tracer uptake calculated as the activity concentration within... \n",
- "12 Non-neoplastic macrophages that infiltrate a tumor. \n",
- "13 The number that describes the distance, in millimeters, between the upper layer of the epidermis and the deepest point of tumor penetration. \n",
- "14 Numeric value used to describe the non-peritonealised bare area of rectum, comprising anterior and posterior segments, when submitted as a surgical sp... \n",
- "15 Numeric value used to represent the 2-dimensional extent of the body surface relating height to weight. \n",
- "16 Major version for a GDC workflow. \n",
- "17 Mean coverage for whole genome sequencing, or mean target coverage for whole exome and targeted sequencing, collected from Picard Tools. \n",
- "18 Numeric value that represents the depth of tumor invasion, measured in millimeters (mm). \n",
- "19 Text term for the degree to which a tumor has penetrated into organ or tissue. \n",
- "\n",
- " Values (sample) \n",
- "0 \n",
- "1 \n",
- "2 \n",
- "3 \n",
- "4 \n",
- "5 \n",
- "6 \n",
- "7 \n",
- "8 \n",
- "9 \n",
- "10 \n",
- "11 \n",
- "12 Few, Many, Moderate \n",
- "13 \n",
- "14 \n",
- "15 \n",
- "16 \n",
- "17 \n",
- "18 \n",
- "19 Deep, Superficial, Not Reported "
+ "BokehModel(combine_events=True, render_bundle={'docs_json': {'0bf58594-a7b6-4b36-9048-2264a754d6d6': {'version…"
]
},
+ "execution_count": 4,
"metadata": {},
- "output_type": "display_data"
+ "output_type": "execute_result"
}
],
"source": [
- "\n",
- "manager = APIManager()\n",
- "manager.load_dataset('./datasets/dou.csv')\n",
- "reduced_scope = manager.reduce_scope()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [],
- "source": [
- "from bdi.visualization.scope_reducing import SRHeatMapManager\n",
+ "from bdikit.visualization.scope_reducing import SRHeatMapManager\n",
"\n",
"heatmap_manager = SRHeatMapManager()\n",
- "heatmap_manager.get_heatmap(reduced_scope)\n",
+ "heatmap_manager._write_json(reduced_scope)\n",
+ "heatmap_manager.get_heatmap()\n",
"heatmap_manager.plot_heatmap()"
]
}