diff --git a/bdikit/visualization/scope_reducing.py b/bdikit/visualization/scope_reducing.py index 64ba9765..0256d240 100644 --- a/bdikit/visualization/scope_reducing.py +++ b/bdikit/visualization/scope_reducing.py @@ -1,4 +1,5 @@ import json +import logging import altair as alt import numpy as np @@ -11,6 +12,8 @@ pn.extension("mathjax") pn.extension("vega") +logger = logging.getLogger(__name__) + class SRHeatMapManager: def __init__(self) -> None: @@ -23,6 +26,9 @@ def __init__(self) -> None: self.rec_cols_gdc = None self.clusters = None + # Selected column + self.selected_row = None + def _load_json(self): with open(self.json_path) as f: data = json.load(f) @@ -32,7 +38,8 @@ def _write_json(self, data): with open(self.json_path, "w") as f: json.dump(data, f) - def get_heatmap(self, recommendations): + def get_heatmap(self): + recommendations = self._load_json() rec_cols = set() rec_table = [] rec_list = [] @@ -124,6 +131,48 @@ def _get_column_values(self, properties): else: return None + def _accept_match(self, col_name=None, match_name=None): + if self.selected_row is None: + return + col_name = self.selected_row["Column"].values[0] + match_name = self.selected_row["Recommendation"].values[0] + recommendations = self._load_json() + for idx, d in enumerate(recommendations): + candidate_name = d["Candidate column"] + if candidate_name != col_name: + continue + for top_k_name, top_k_score in d["Top k columns"]: + if top_k_name == match_name: + recommendations[idx] = { + "Candidate column": candidate_name, + "Top k columns": [[top_k_name, top_k_score]], + } + self._write_json(recommendations) + self.get_heatmap() + return + + def _reject_match(self): + if self.selected_row is None: + return + col_name = self.selected_row["Column"].values[0] + match_name = self.selected_row["Recommendation"].values[0] + recommendations = self._load_json() + for idx, d in enumerate(recommendations): + candidate_name = d["Candidate column"] + if candidate_name != col_name: + continue + new_top_k = [] + for top_k_name, top_k_score in d["Top k columns"]: + if top_k_name != match_name: + new_top_k.append([top_k_name, top_k_score]) + recommendations[idx] = { + "Candidate column": candidate_name, + "Top k columns": new_top_k, + } + self._write_json(recommendations) + self.get_heatmap() + return + def get_clusters(self): words = self.rec_table_df["Column"].to_numpy() lev_similarity = -1 * np.array( @@ -136,19 +185,52 @@ def get_clusters(self): ) affprop.fit(lev_similarity) - print(f"Number of clusters: {np.unique(affprop.labels_).shape[0]}\n") + logger.debug(f"Number of clusters: {np.unique(affprop.labels_).shape[0]}\n") cluster_names = [] clusters = {} for cluster_id in np.unique(affprop.labels_): exemplar = words[affprop.cluster_centers_indices_[cluster_id]] cluster = np.unique(words[np.nonzero(affprop.labels_ == cluster_id)]) cluster_str = ", ".join(cluster) - print(" - *%s:* %s" % (exemplar, cluster_str)) + logger.debug(" - *%s:* %s" % (exemplar, cluster_str)) cluster_names.append(exemplar) clusters[exemplar] = cluster self.clusters = clusters - def _plot_heatmap(self, clusters=[], subschemas=[], threshold=0.5): + def _plot_heatmap_base(self, heatmap_rec_list): + single = alt.selection_point(name="single") + base = ( + alt.Chart(heatmap_rec_list) + .mark_rect(size=100) + .encode( + y=alt.X("Column:O", sort=None), + x=alt.X(f"Recommendation:O", sort=None), + color=alt.condition(single, "Value:Q", alt.value("lightgray")), + # color="Value:Q", + tooltip=[ + alt.Tooltip("Column", title="Column"), + alt.Tooltip("Recommendation", title="Recommendation"), + alt.Tooltip("Value", title="Value"), + ], + ) + .add_params(single) + ) + return pn.pane.Vega(base) + + def _plot_selected_row(self, heatmap_rec_list, selection): + if not selection: + return "## No selection" + selected_row = heatmap_rec_list.iloc[selection] + column = selected_row["Column"].values[0] + rec = selected_row["Recommendation"].values[0] + # value = selected_row["Value"] + # self._accept_match(column, rec) + self.selected_row = selected_row + return pn.widgets.DataFrame(selected_row) + + def _plot_pane( + self, clusters=[], subschemas=[], threshold=0.5, acc_click=0, rej_click=0 + ): heatmap_rec_list = self.rec_list_df[self.rec_list_df["Value"] >= threshold] if clusters: clustered_cols = [] @@ -165,21 +247,15 @@ def _plot_heatmap(self, clusters=[], subschemas=[], threshold=0.5): heatmap_rec_list["Recommendation"].isin(subschema_rec_cols) ] - base = ( - alt.Chart(heatmap_rec_list) - .mark_rect() - .encode( - y=alt.X("Column:O", sort=None), - x=alt.X(f"Recommendation:O", sort=None), - color="Value:Q", - tooltip=[ - alt.Tooltip("Column", title="Column"), - alt.Tooltip("Recommendation", title="Recommendation"), - alt.Tooltip("Value", title="Value"), - ], - ) + heatmap_pane = self._plot_heatmap_base(heatmap_rec_list) + return pn.Column( + heatmap_pane, + pn.bind( + self._plot_selected_row, + heatmap_rec_list, + heatmap_pane.selection.param.single, + ), ) - return pn.pane.Vega(base) def plot_heatmap(self): select_cluster = pn.widgets.MultiChoice( @@ -192,8 +268,26 @@ def plot_heatmap(self): name="Threshold", start=0, end=1.0, step=0.01, value=0.5, width=220 ) + acc_button = pn.widgets.Button(name="Accept Match", button_type="success") + + rej_button = pn.widgets.Button(name="Decline Match", button_type="danger") + + def on_click_accept_match(event): + self._accept_match() + + def on_click_reject_match(event): + self._reject_match() + + acc_button.on_click(on_click_accept_match) + rej_button.on_click(on_click_reject_match) + heatmap_bind = pn.bind( - self._plot_heatmap, select_cluster, select_rec_groups, thresh_slider + self._plot_pane, + select_cluster, + select_rec_groups, + thresh_slider, + acc_button.param.clicks, + rej_button.param.clicks, ) column_left = pn.Column( @@ -201,6 +295,8 @@ def plot_heatmap(self): select_cluster, select_rec_groups, thresh_slider, + acc_button, + rej_button, styles=dict(background="WhiteSmoke"), ) diff --git a/examples/scope_reducing_heatmap.ipynb b/examples/scope_reducing_heatmap.ipynb index 21e32e48..1a101b53 100644 --- a/examples/scope_reducing_heatmap.ipynb +++ b/examples/scope_reducing_heatmap.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -23,7 +23,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -70,12 +70,12 @@ "data": { "application/vnd.holoviews_exec.v0+json": "", "text/html": [ - "
\n", - "
\n", + "
\n", + "
\n", "
\n", "" ] }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "aa16b927-ebd1-45d1-a966-2d079e223c93" + } + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.4.1'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var reloading = true;\n var Bokeh = root.Bokeh;\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'mathjax': '//cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS_HTML', 'vega-embed': 'https://cdn.jsdelivr.net/npm/vega-embed@6/build/vega-embed.min', 'vega-lite': 'https://cdn.jsdelivr.net/npm/vega-lite@5/build/vega-lite.min', 'vega': 'https://cdn.jsdelivr.net/npm/vega@5/build/vega.min'}, 'shim': {'mathjax': {'exports': 'MathJax'}}});\n require([\"mathjax\"], function() {\n\ton_load()\n })\n require([\"vega-embed\"], function(vegaEmbed) {\n\twindow.vegaEmbed = vegaEmbed\n\ton_load()\n })\n require([\"vega-lite\"], function(vl) {\n\twindow.vl = vl\n\ton_load()\n })\n require([\"vega\"], function(vega) {\n\twindow.vega = vega\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 4;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window.MathJax !== undefined) && (!(window.MathJax instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window.vega !== undefined) && (!(window.vega instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega@5'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window.vegaLite !== undefined) && (!(window.vegaLite instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega-lite@5'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window.vegaEmbed !== undefined) && (!(window.vegaEmbed instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega-embed@6'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-MML-AM_CHTML\", \"https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega@5\", \"https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega-lite@5\", \"https://cdn.holoviz.org/panel/1.4.2/dist/bundled/vegaplot/vega-embed@6\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n\ttry {\n inline_js[i].call(root, root.Bokeh);\n\t} catch(e) {\n\t if (!reloading) {\n\t throw e;\n\t }\n\t}\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));", + "application/vnd.holoviews_load.v0+json": "" + }, "metadata": {}, "output_type": "display_data" }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Tumor_Focality:\n" - ] + "data": { + "application/javascript": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n", + "application/vnd.holoviews_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" }, { "data": { "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CandidateSimilarityDescriptionValues (sample)
0tumor_focality0.808The text term used to describe whether the patient's disease originated in a single location or multiple locations.Multifocal, Unifocal, Unknown, Not Reported
1tumor_shape0.5626Text term to represent the description of the shape of a tumor determined by clinical or pathological techniques.Diffuse, Dome, Mushroom, Unknown
2tumor_depth_descriptor0.5311Text term for the degree to which a tumor has penetrated into organ or tissue.Deep, Superficial, Not Reported
3enneking_msts_tumor_site0.4829Text term and code that represents the tumor site of the musculoskeletal sarcoma, using the Enneking staging system approved by the Musculoskeletal Tu...Extracompartmental (T2), Intracompartmental (T1), Unknown, Not Reported
4biospecimen_type0.4809The text term used to describe the biological material used for testing, diagnostic, treatment or research purposes.Blood, Bone Marrow, Buccal Mucosa, Buffy Coat, Cerebrospinal Fluid, Connective Tissue, Embryonic Fluid, Embryonic Tissue, Feces, Granulocyte, Involved...
5tissue_type0.4783Text term that represents a description of the kind of tissue collected with respect to disease status or proximity to tumor tissue.Abnormal, Normal, Peritumoral, Tumor, Unknown, Not Reported, Not Allowed To Collect
6wilms_tumor_histologic_subtype0.4597The text term used to describe the classification of Wilms tumors distinguishing between favorable and unfavorable histologic groups.Favorable, Unfavorable, Unknown, Not Reported
7residual_tumor_measurement0.4544A measurement of the tumor cells that remain in the body following cancer treatment.1-10 mm, 11-20 mm, >20 mm, No macroscopic disease
8non_nodal_tumor_deposits0.451The yes/no/unknown indicator used to describe the presence of tumor deposits in the pericolic or perirectal fat or in adjacent mesentery away from the...Yes, No, Unknown, Not Reported
9tumor_infiltrating_macrophages0.4464Non-neoplastic macrophages that infiltrate a tumor.Few, Many, Moderate
10specimen_type0.443The type of a material sample taken from a biological entity for testing, diagnostic, propagation, treatment or research purposes. This includes parti...2D Classical Conditionally Reprogrammed Cells, 2D Modified Conditionally Reprogrammed Cells, 3D Air-Liquid Interface Organoid, 3D Neurosphere, 3D Orga...
11extranodal_extension0.4366Extension of a malignant neoplasm beyond the lymph node capsule.Gross Extension, Microscopic Extension, No Extranodal Extension
12measurement_unit0.4319The type of unit of measure being used to express a length or distance.Centimeters, Millimeters
13columnar_mucosa_present0.4153Indicator noting whether columnar mucosa was present within the tissue.Yes, No, Unknown, Not Reported
14distance_normal_to_tumor0.4077Text term to signify the distance between the tumor tissue and the normal control tissue that was procured for matching normal DNA.Adjacent (< or = 2cm), Distal (>2cm), Unknown, Not Reported
15morphologic_architectural_pattern0.3949A specific morphologic or pathologic architectural pattern was discovered within the sample studied.Cohesive, Cribiform, Micropapillary, Non-cohesive, Papillary Renal Cell, Papillary, NOS, Solid, Tubular
16vascular_invasion_type0.3896Text term that represents the type of vascular tumor invasion.Extramural, Intramural, Macro, Micro, No Vascular Invasion, Unknown, Not Reported
17residual_tumor0.3751Tumor cells that remain in the body following cancer treatment.R0, R1, R2, RX
18enneking_msts_metastasis0.3703Text term and code that represents the metastatic stage of the musculoskeletal sarcoma, using the Enneking staging system approved by the Musculoskele...No Metastasis (M0), Regional or Distant Metastasis (M1), Unknown, Not Reported
19ovarian_specimen_status0.3582The text term used to describe the physical condition of the involved ovary.Ovarian Capsule Fragmented, Ovarian Capsule Intact, Ovarian Capsule Ruptured, Unknown, Not Reported
\n", - "
" - ], - "text/plain": [ - " Candidate Similarity \\\n", - "0 tumor_focality 0.808 \n", - "1 tumor_shape 0.5626 \n", - "2 tumor_depth_descriptor 0.5311 \n", - "3 enneking_msts_tumor_site 0.4829 \n", - "4 biospecimen_type 0.4809 \n", - "5 tissue_type 0.4783 \n", - "6 wilms_tumor_histologic_subtype 0.4597 \n", - "7 residual_tumor_measurement 0.4544 \n", - "8 non_nodal_tumor_deposits 0.451 \n", - "9 tumor_infiltrating_macrophages 0.4464 \n", - "10 specimen_type 0.443 \n", - "11 extranodal_extension 0.4366 \n", - "12 measurement_unit 0.4319 \n", - "13 columnar_mucosa_present 0.4153 \n", - "14 distance_normal_to_tumor 0.4077 \n", - "15 morphologic_architectural_pattern 0.3949 \n", - "16 vascular_invasion_type 0.3896 \n", - "17 residual_tumor 0.3751 \n", - "18 enneking_msts_metastasis 0.3703 \n", - "19 ovarian_specimen_status 0.3582 \n", - "\n", - " Description \\\n", - "0 The text term used to describe whether the patient's disease originated in a single location or multiple locations. \n", - "1 Text term to represent the description of the shape of a tumor determined by clinical or pathological techniques. \n", - "2 Text term for the degree to which a tumor has penetrated into organ or tissue. \n", - "3 Text term and code that represents the tumor site of the musculoskeletal sarcoma, using the Enneking staging system approved by the Musculoskeletal Tu... \n", - "4 The text term used to describe the biological material used for testing, diagnostic, treatment or research purposes. \n", - "5 Text term that represents a description of the kind of tissue collected with respect to disease status or proximity to tumor tissue. \n", - "6 The text term used to describe the classification of Wilms tumors distinguishing between favorable and unfavorable histologic groups. \n", - "7 A measurement of the tumor cells that remain in the body following cancer treatment. \n", - "8 The yes/no/unknown indicator used to describe the presence of tumor deposits in the pericolic or perirectal fat or in adjacent mesentery away from the... \n", - "9 Non-neoplastic macrophages that infiltrate a tumor. \n", - "10 The type of a material sample taken from a biological entity for testing, diagnostic, propagation, treatment or research purposes. This includes parti... \n", - "11 Extension of a malignant neoplasm beyond the lymph node capsule. \n", - "12 The type of unit of measure being used to express a length or distance. \n", - "13 Indicator noting whether columnar mucosa was present within the tissue. \n", - "14 Text term to signify the distance between the tumor tissue and the normal control tissue that was procured for matching normal DNA. \n", - "15 A specific morphologic or pathologic architectural pattern was discovered within the sample studied. \n", - "16 Text term that represents the type of vascular tumor invasion. \n", - "17 Tumor cells that remain in the body following cancer treatment. \n", - "18 Text term and code that represents the metastatic stage of the musculoskeletal sarcoma, using the Enneking staging system approved by the Musculoskele... \n", - "19 The text term used to describe the physical condition of the involved ovary. \n", + "" ] }, "metadata": {}, "output_type": "display_data" }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Tumor_Size_cm:\n" - ] - }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CandidateSimilarityDescriptionValues (sample)
0shortest_dimension0.7575Numeric value that represents the shortest dimension of the sample, measured in millimeters.
1size_extraocular_nodule0.7416The size of the nodule that is outside the eye.
2tumor_width_measurement0.7034The numerical measurement of tumor width.
3tumor_depth_measurement0.6989The numerical measurement of tumor depth.
4tumor_thickness0.6506A measurement of the thickness of a sectioned slice (of tissue or mineral or other substance) in millimeters (mm).
5analyte_quantity0.6418The quantity in micrograms (ug) of the analyte(s) derived from the analyte(s) shipped for sequencing and characterization.
6average_insert_size0.6362Average insert size collected from samtools.
7tumor_largest_dimension_diameter0.5736Numeric value used to describe the maximum diameter or dimension of the primary tumor, measured in centimeters.
8mitotic_total_area0.5635The total area reviewed when calculating the mitotic index ratio.
9rin0.5578A numerical assessment of the integrity of RNA based on the entire electrophoretic trace of the RNA sample including the presence or absence of degrad...
10size_selection_range0.5574Range of size selection.
11imaging_suv0.5519The standardized update value (SUV) is the effectively dimensionless measure of regional tracer uptake calculated as the activity concentration within...
12tumor_infiltrating_macrophages0.5417Non-neoplastic macrophages that infiltrate a tumor.Few, Many, Moderate
13breslow_thickness0.5358The number that describes the distance, in millimeters, between the upper layer of the epidermis and the deepest point of tumor penetration.
14circumferential_resection_margin0.5312Numeric value used to describe the non-peritonealised bare area of rectum, comprising anterior and posterior segments, when submitted as a surgical sp...
15body_surface_area0.5154Numeric value used to represent the 2-dimensional extent of the body surface relating height to weight.
16workflow_version0.5089Major version for a GDC workflow.
17mean_coverage0.5042Mean coverage for whole genome sequencing, or mean target coverage for whole exome and targeted sequencing, collected from Picard Tools.
18tumor_depth0.4949Numeric value that represents the depth of tumor invasion, measured in millimeters (mm).
19tumor_depth_descriptor0.4846Text term for the degree to which a tumor has penetrated into organ or tissue.Deep, Superficial, Not Reported
\n", - "
" - ], + "application/vnd.jupyter.widget-view+json": { + "model_id": "5a1f31b2ebc24a6ab8953e084f9f00ab", + "version_major": 2, + "version_minor": 0 + }, "text/plain": [ - " Candidate Similarity \\\n", - "0 shortest_dimension 0.7575 \n", - "1 size_extraocular_nodule 0.7416 \n", - "2 tumor_width_measurement 0.7034 \n", - "3 tumor_depth_measurement 0.6989 \n", - "4 tumor_thickness 0.6506 \n", - "5 analyte_quantity 0.6418 \n", - "6 average_insert_size 0.6362 \n", - "7 tumor_largest_dimension_diameter 0.5736 \n", - "8 mitotic_total_area 0.5635 \n", - "9 rin 0.5578 \n", - "10 size_selection_range 0.5574 \n", - "11 imaging_suv 0.5519 \n", - "12 tumor_infiltrating_macrophages 0.5417 \n", - "13 breslow_thickness 0.5358 \n", - "14 circumferential_resection_margin 0.5312 \n", - "15 body_surface_area 0.5154 \n", - "16 workflow_version 0.5089 \n", - "17 mean_coverage 0.5042 \n", - "18 tumor_depth 0.4949 \n", - "19 tumor_depth_descriptor 0.4846 \n", - "\n", - " Description \\\n", - "0 Numeric value that represents the shortest dimension of the sample, measured in millimeters. \n", - "1 The size of the nodule that is outside the eye. \n", - "2 The numerical measurement of tumor width. \n", - "3 The numerical measurement of tumor depth. \n", - "4 A measurement of the thickness of a sectioned slice (of tissue or mineral or other substance) in millimeters (mm). \n", - "5 The quantity in micrograms (ug) of the analyte(s) derived from the analyte(s) shipped for sequencing and characterization. \n", - "6 Average insert size collected from samtools. \n", - "7 Numeric value used to describe the maximum diameter or dimension of the primary tumor, measured in centimeters. \n", - "8 The total area reviewed when calculating the mitotic index ratio. \n", - "9 A numerical assessment of the integrity of RNA based on the entire electrophoretic trace of the RNA sample including the presence or absence of degrad... \n", - "10 Range of size selection. \n", - "11 The standardized update value (SUV) is the effectively dimensionless measure of regional tracer uptake calculated as the activity concentration within... \n", - "12 Non-neoplastic macrophages that infiltrate a tumor. \n", - "13 The number that describes the distance, in millimeters, between the upper layer of the epidermis and the deepest point of tumor penetration. \n", - "14 Numeric value used to describe the non-peritonealised bare area of rectum, comprising anterior and posterior segments, when submitted as a surgical sp... \n", - "15 Numeric value used to represent the 2-dimensional extent of the body surface relating height to weight. \n", - "16 Major version for a GDC workflow. \n", - "17 Mean coverage for whole genome sequencing, or mean target coverage for whole exome and targeted sequencing, collected from Picard Tools. \n", - "18 Numeric value that represents the depth of tumor invasion, measured in millimeters (mm). \n", - "19 Text term for the degree to which a tumor has penetrated into organ or tissue. \n", - "\n", - " Values (sample) \n", - "0 \n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", - "5 \n", - "6 \n", - "7 \n", - "8 \n", - "9 \n", - "10 \n", - "11 \n", - "12 Few, Many, Moderate \n", - "13 \n", - "14 \n", - "15 \n", - "16 \n", - "17 \n", - "18 \n", - "19 Deep, Superficial, Not Reported " + "BokehModel(combine_events=True, render_bundle={'docs_json': {'0bf58594-a7b6-4b36-9048-2264a754d6d6': {'version…" ] }, + "execution_count": 4, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "\n", - "manager = APIManager()\n", - "manager.load_dataset('./datasets/dou.csv')\n", - "reduced_scope = manager.reduce_scope()" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "from bdi.visualization.scope_reducing import SRHeatMapManager\n", + "from bdikit.visualization.scope_reducing import SRHeatMapManager\n", "\n", "heatmap_manager = SRHeatMapManager()\n", - "heatmap_manager.get_heatmap(reduced_scope)\n", + "heatmap_manager._write_json(reduced_scope)\n", + "heatmap_manager.get_heatmap()\n", "heatmap_manager.plot_heatmap()" ] }