From 01bcdfacc534d18a62dc0db5d73da36261017f23 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 8 Oct 2024 08:58:53 -0400 Subject: [PATCH 1/5] Add VegaFusionWidget based on AnyWidget --- .../vegafusion/jupyter/__init__.py | 246 ++++++++++++++++++ .../vegafusion/jupyter/js/index.js | 202 ++++++++++++++ 2 files changed, 448 insertions(+) create mode 100644 vegafusion-python/vegafusion/jupyter/__init__.py create mode 100644 vegafusion-python/vegafusion/jupyter/js/index.js diff --git a/vegafusion-python/vegafusion/jupyter/__init__.py b/vegafusion-python/vegafusion/jupyter/__init__.py new file mode 100644 index 00000000..08a898c0 --- /dev/null +++ b/vegafusion-python/vegafusion/jupyter/__init__.py @@ -0,0 +1,246 @@ +import json +import pathlib +from typing import Any +import anywidget +import traitlets +from vegafusion import runtime + +_here = pathlib.Path(__file__).parent + + +def load_js_src() -> str: + return (_here / "js" / "index.js").read_text() + + +class VegaFusionWidget(anywidget.AnyWidget): + _esm = load_js_src() + _css = r""" + .vega-embed { + /* Make sure action menu isn't cut off */ + overflow: visible; + } + """ + + # Public traitlets + spec = traitlets.Dict(allow_none=True) + transformed_spec = traitlets.Dict(allow_none=True).tag(sync=True) + inline_datasets = traitlets.Dict(default_value=None, allow_none=True) + debounce_wait = traitlets.Float(default_value=10).tag(sync=True) + max_wait = traitlets.Bool(default_value=True).tag(sync=True) + local_tz = traitlets.Unicode(default_value=None, allow_none=True).tag(sync=True) + embed_options = traitlets.Dict(default_value=None, allow_none=True).tag(sync=True) + debug = traitlets.Bool(default_value=False) + row_limit = traitlets.Int(default_value=100000).tag(sync=True) + + # Public output traitlets + warnings = traitlets.List(allow_none=True) + + # Internal comm traitlets for VegaFusion support + _js_watch_plan = traitlets.Any(allow_none=True).tag(sync=True) + _js_to_py_updates = traitlets.Any(allow_none=True).tag(sync=True) + _py_to_js_updates = traitlets.Any(allow_none=True).tag(sync=True) + + # Other internal state + _chart_state = traitlets.Any(allow_none=True) + + # Track whether widget is configured for offline use + _is_offline = False + + @classmethod + def enable_offline(cls, offline: bool = True): + """ + Configure VegaFusionWidget's offline behavior + + Parameters + ---------- + offline: bool + If True, configure VegaFusionWidget to operate in offline mode where JavaScript + dependencies are loaded from vl-convert. + If False, configure it to operate in online mode where JavaScript dependencies + are loaded from CDN dynamically. This is the default behavior. + """ + import vl_convert as vlc + + if offline: + if cls._is_offline: + # Already offline + return + + src_lines = load_js_src().split("\n") + + # Remove leading lines with only whitespace, comments, or imports + while src_lines and ( + len(src_lines[0].strip()) == 0 + or src_lines[0].startswith("import") + or src_lines[0].startswith("//") + ): + src_lines.pop(0) + + src = "\n".join(src_lines) + + # vl-convert's javascript_bundle function creates a self-contained JavaScript bundle + # for JavaScript snippets that import from a small set of dependencies that + # vl-convert includes. To see the available imports and their imported names, run + # import vl_convert as vlc + # help(vlc.javascript_bundle) + bundled_src = vlc.javascript_bundle(src) + cls._esm = bundled_src + cls._is_offline = True + else: + cls._esm = load_js_src() + cls._is_offline = False + + + def __init__( + self, + spec: dict, + inline_datasets: dict | None = None, + debounce_wait: int = 10, + max_wait: bool = True, + debug: bool = False, + embed_options: dict | None = None, + local_tz: str | None = None, + row_limit: int = 100000, + **kwargs: Any, + ): + """ + Jupyter Widget for displaying Vega chart specifications, using VegaFusion + for server-side scaling. + + Parameters + ---------- + spec: dict + Vega chart specification + inline_datasets: dict | None + Datasets referenced in the Vega spec in vegafusion+dataset:// URLs + debounce_wait: int + Debouncing wait time in milliseconds. Updates will be sent from the client to the kernel + after debounce_wait milliseconds of no chart interactions. + max_wait: bool + If True (default), updates will be sent from the client to the kernel every debounce_wait + milliseconds even if there are ongoing chart interactions. If False, updates will not be + sent until chart interactions have completed. + debug: bool + If True, debug messages will be printed + embed_options: dict + Options to pass to vega-embed. + See https://github.com/vega/vega-embed?tab=readme-ov-file#options + local_tz: str | None + Timezone to use for the chart. If None, the chart will use the browser's local timezone. + row_limit: int + Maximum number of rows to send to the browser, after VegaFusion has performed is transformations. + A RowLimitError will be raised if the VegaFusion operation results in more than row_limit rows. + """ + super().__init__( + spec=spec, + inline_datasets=inline_datasets, + debounce_wait=debounce_wait, + max_wait=max_wait, + debug=debug, + embed_options=embed_options, + local_tz=local_tz, + row_limit=row_limit, + **kwargs, + ) + self.on_msg(self._handle_custom_msg) + + @traitlets.observe("spec") + def _on_change_spec(self, change): + """ + Internal callback function that updates the widgets's internal + state when the Vega chart specification changes + """ + new_spec = change.new + + if new_spec is None: + # Clear state + with self.hold_sync(): + self.transformed_spec = None + self._chart_state = None + self._js_watch_plan = None + return + + if self.local_tz is None: + def on_local_tz_change(change): + self._init_chart_state(change["new"]) + + self.observe(on_local_tz_change, ["local_tz"]) + else: + self._init_chart_state(self.local_tz) + + @traitlets.observe("inline_datasets") + def _on_change_inline_datasets(self, change): + """ + Internal callback function that updates the widgets's internal + state when the inline datasets change + """ + self._init_chart_state(self.local_tz) + + def _handle_custom_msg(self, content, buffers): + if content.get('type') == 'update_state': + self._handle_update_state(content.get('updates', [])) + + def _handle_update_state(self, updates): + """ + Handle the 'update_state' message from JavaScript + """ + if self.debug: + print(f"Received update_state message from JavaScript:\n{updates}") + + # Process the updates using the chart state + if self._chart_state is not None: + processed_updates = self._chart_state.update(updates) + + if self.debug: + print(f"Processed updates:\n{processed_updates}") + + # Send the processed updates back to JavaScript + self.send({"type": "update_view", "updates": processed_updates}) + else: + print("Warning: Received update_state message, but chart state is not initialized.") + + + def _init_chart_state(self, local_tz: str): + if self.spec is not None: + with self.hold_sync(): + + # Build the chart state + self._chart_state = runtime.new_chart_state( + self.spec, + local_tz=local_tz, + inline_datasets=self.inline_datasets, + row_limit=self.row_limit, + ) + + # Check if the row limit was exceeded + handle_row_limit_exceeded(self.row_limit, self._chart_state.get_warnings()) + + # Get the watch plan and transformed spec + self._js_watch_plan = self._chart_state.get_watch_plan()[ + "client_to_server" + ] + self.transformed_spec = self._chart_state.get_transformed_spec() + self.warnings = self._chart_state.get_warnings() + + +def handle_row_limit_exceeded(row_limit: int, warnings: list): + for warning in warnings: + if warning.get("type") == "RowLimitExceeded": + msg = ( + "The number of dataset rows after filtering and aggregation exceeds\n" + f"the current limit of {row_limit}. Try adding an aggregation to reduce\n" + "the size of the dataset that must be loaded into the browser. Or, disable\n" + "the limit by setting the row_limit traitlet to None. Note that\n" + "disabling this limit may cause the browser to freeze or crash." + ) + raise RowLimitExceededError(msg) + + +class RowLimitExceededError(Exception): + """ + Exception raised when the number of dataset rows after filtering and aggregation exceeds + the current limit. + """ + def __init__(self, message: str): + super().__init__(message) + diff --git a/vegafusion-python/vegafusion/jupyter/js/index.js b/vegafusion-python/vegafusion/jupyter/js/index.js new file mode 100644 index 00000000..64b34e07 --- /dev/null +++ b/vegafusion-python/vegafusion/jupyter/js/index.js @@ -0,0 +1,202 @@ +import vegaEmbed from "https://esm.sh/vega-embed@6?deps=vega@5&deps=vega-lite@5.19.0"; +import lodashDebounce from "https://esm.sh/lodash-es@4.17.21/debounce"; + +// Note: For offline support, the import lines above are removed and the remaining script +// is bundled using vl-convert's javascript_bundle function. See the documentation of +// the javascript_bundle function for details on the available imports and their names. +// If an additional import is required in the future, it will need to be added to vl-convert +// in order to preserve offline support. +async function render({ model, el }) { + let finalize; + + function showError(error){ + el.innerHTML = ( + '
' + + '

JavaScript Error: ' + error.message + '

' + + "

This usually means there's a typo in your chart specification. " + + "See the javascript console for the full traceback.

" + + '
' + ); + } + + const reembed = async () => { + if (finalize != null) { + finalize(); + } + + model.set("local_tz", Intl.DateTimeFormat().resolvedOptions().timeZone); + + let spec = structuredClone(model.get("transformed_spec")); + if (spec == null) { + // Remove any existing chart and return + while (el.firstChild) { + el.removeChild(el.lastChild); + } + model.save_changes(); + return; + } + let embedOptions = structuredClone(model.get("embed_options")) ?? undefined; + + let api; + try { + api = await vegaEmbed(el, spec, embedOptions); + } catch (error) { + showError(error) + return; + } + + finalize = api.finalize; + + // Debounce config + const wait = model.get("debounce_wait") ?? 10; + const debounceOpts = {leading: false, trailing: true}; + if (model.get("max_wait") ?? true) { + debounceOpts["maxWait"] = wait; + } + model.save_changes(); + + // Add signal/data listeners + for (const watch of model.get("_js_watch_plan") ?? []) { + if (watch.namespace === "data") { + const dataHandler = (_, value) => { + const updates = [{ + namespace: "data", + name: watch.name, + scope: watch.scope, + value: cleanJson(value) + }] + model.send({type: "update_state", updates: updates}); + }; + addDataListener(api.view, watch.name, watch.scope, lodashDebounce(dataHandler, wait, debounceOpts)) + + } else if (watch.namespace === "signal") { + const signalHandler = (_, value) => { + const updates = [{ + namespace: "signal", + name: watch.name, + scope: watch.scope, + value: cleanJson(value) + }] + model.send({type: "update_state", updates: updates}); + }; + + addSignalListener(api.view, watch.name, watch.scope, lodashDebounce(signalHandler, wait, debounceOpts)) + } + } + + // // Add signal/data updaters + // model.on('change:_py_to_js_updates', async (updates) => { + // for (const update of updates.changed._py_to_js_updates ?? []) { + // if (update.namespace === "signal") { + // setSignalValue(api.view, update.name, update.scope, update.value); + // } else if (update.namespace === "data") { + // setDataValue(api.view, update.name, update.scope, update.value); + // } + // } + // await api.view.runAsync(); + // }); + + // Add signal/data updaters as messages + model.on("msg:custom", msg => { + if (msg.type === "update_view") { + const updates = msg.updates; + for (const update of updates) { + if (update.namespace === "signal") { + setSignalValue(api.view, update.name, update.scope, update.value); + } else if (update.namespace === "data") { + setDataValue(api.view, update.name, update.scope, update.value); + } + } + } + }); + } + + model.on('change:transformed_spec', reembed); + model.on('change:embed_options', reembed); + model.on('change:debounce_wait', reembed); + model.on('change:max_wait', reembed); + await reembed(); +} + +function cleanJson(data) { + return JSON.parse(JSON.stringify(data)) +} + +function getNestedRuntime(view, scope) { + var runtime = view._runtime; + for (const index of scope) { + runtime = runtime.subcontext[index]; + } + return runtime +} + +function lookupSignalOp(view, name, scope) { + let parent_runtime = getNestedRuntime(view, scope); + return parent_runtime.signals[name] ?? null; +} + +function dataRef(view, name, scope) { + let parent_runtime = getNestedRuntime(view, scope); + return parent_runtime.data[name]; +} + +export function setSignalValue(view, name, scope, value) { + let signal_op = lookupSignalOp(view, name, scope); + view.update(signal_op, value); +} + +export function setDataValue(view, name, scope, value) { + let dataset = dataRef(view, name, scope); + let changeset = view.changeset().remove(() => true).insert(value) + dataset.modified = true; + view.pulse(dataset.input, changeset); +} + +export function addSignalListener(view, name, scope, handler) { + let signal_op = lookupSignalOp(view, name, scope); + return addOperatorListener( + view, + name, + signal_op, + handler, + ); +} + +export function addDataListener(view, name, scope, handler) { + let dataset = dataRef(view, name, scope).values; + return addOperatorListener( + view, + name, + dataset, + handler, + ); +} + +// Private helpers from Vega for dealing with nested signals/data +function findOperatorHandler(op, handler) { + const h = (op._targets || []) + .filter(op => op._update && op._update.handler === handler); + return h.length ? h[0] : null; +} + +function addOperatorListener(view, name, op, handler) { + let h = findOperatorHandler(op, handler); + if (!h) { + h = trap(view, () => handler(name, op.value)); + h.handler = handler; + view.on(op, null, h); + } + return view; +} + +function trap(view, fn) { + return !fn ? null : function() { + try { + fn.apply(this, arguments); + } catch (error) { + view.error(error); + } + }; +} + +export default { render } From 289cff3830e8f451e958bff027b8b0cc4abfcf67 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 8 Oct 2024 09:41:23 -0400 Subject: [PATCH 2/5] remove comment --- vegafusion-python/vegafusion/jupyter/js/index.js | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/vegafusion-python/vegafusion/jupyter/js/index.js b/vegafusion-python/vegafusion/jupyter/js/index.js index 64b34e07..0f54905d 100644 --- a/vegafusion-python/vegafusion/jupyter/js/index.js +++ b/vegafusion-python/vegafusion/jupyter/js/index.js @@ -84,18 +84,6 @@ async function render({ model, el }) { } } - // // Add signal/data updaters - // model.on('change:_py_to_js_updates', async (updates) => { - // for (const update of updates.changed._py_to_js_updates ?? []) { - // if (update.namespace === "signal") { - // setSignalValue(api.view, update.name, update.scope, update.value); - // } else if (update.namespace === "data") { - // setDataValue(api.view, update.name, update.scope, update.value); - // } - // } - // await api.view.runAsync(); - // }); - // Add signal/data updaters as messages model.on("msg:custom", msg => { if (msg.type === "update_view") { From 822a5a098c3c932beb971f6a57086eb63e83982e Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 8 Oct 2024 09:41:54 -0400 Subject: [PATCH 3/5] add vegafusion_widget test --- vegafusion-python/tests/test_altair_mocks.py | 129 +++++++++++++++---- 1 file changed, 106 insertions(+), 23 deletions(-) diff --git a/vegafusion-python/tests/test_altair_mocks.py b/vegafusion-python/tests/test_altair_mocks.py index f9dff71f..27909a0d 100644 --- a/vegafusion-python/tests/test_altair_mocks.py +++ b/vegafusion-python/tests/test_altair_mocks.py @@ -254,6 +254,105 @@ def test_altair_mock(mock_name, img_tolerance, delay): altair_default_notebook = jupytext.read(io.StringIO(altair_default_markdown), fmt="markdown") vegafusion_jupyter_notebook = jupytext.read(io.StringIO(vegafusion_jupyter_markdown), fmt="markdown") + voila_proc, chrome_driver = launch_voila() + + try: + name = mock_name.replace("/", "-") + altair_imgs = export_image_sequence( + chrome_driver, altair_default_notebook, name + "_altair", actions, delay + ) + vegafusion_mime_imgs = export_image_sequence( + chrome_driver, vegafusion_jupyter_notebook, name + "_vegafusion_mime", actions, delay + ) + + compare_images(altair_imgs, vegafusion_mime_imgs, img_tolerance * 0.99) + + finally: + voila_proc.kill() + chrome_driver.close() + time.sleep(0.25) + + +def test_vegafusion_widget(): + + altair_chart_str = """ +from vegafusion.jupyter import VegaFusionWidget + +import altair as alt +from vega_datasets import data + +source = data.seattle_weather.url +brush = alt.selection_interval(encodings=['x']) + +bars = alt.Chart().mark_bar().encode( + x='month(date):O', + y='mean(precipitation):Q', + opacity=alt.condition(brush, alt.OpacityValue(1), alt.OpacityValue(0.7)), +).add_params( + brush +) + +line = alt.Chart().mark_rule(color='firebrick').encode( + y='mean(precipitation):Q', + size=alt.SizeValue(3) +).transform_filter( + brush +) + +chart = alt.layer(bars, line, data=source) +""" + + # Convert to Vega spec and use VegaFusionWidget + notebook_text_vf = r""" +```python +{altair_chart_str} + +vega_spec_inline = chart.to_dict(format="vega") +vega_spec_inline["data"][1]["url"] = "vegafusion+dataset://weather" + +widget = VegaFusionWidget( + spec=vega_spec_inline, + inline_datasets={"weather": data.seattle_weather()} +) +widget +``` +""".replace("{altair_chart_str}", altair_chart_str) + + notebook_vf = jupytext.read(io.StringIO(notebook_text_vf), fmt="markdown") + + # Display with default altair renderer + notebook_text_alt = f""" +```python +{altair_chart_str} + +chart +``` +""" + notebook_alt = jupytext.read(io.StringIO(notebook_text_alt), fmt="markdown") + + # Define actions to perform a selection + actions = [ + {"type": "snapshot"}, + {"type": "move_to", "coords": [150, 150]}, + {"type": "click_and_hold"}, + {"type": "move_to", "coords": [200, 200]}, + {"type": "release"}, + {"type": "snapshot"} + ] + + voila_proc, chrome_driver = launch_voila() + + try: + imgs_alt = export_image_sequence(chrome_driver, notebook_alt, "vegafusion_widget_alt", actions=actions, delay=0.25) + imgs_vf = export_image_sequence(chrome_driver, notebook_vf, "vegafusion_widget_vf", actions=actions, delay=0.25) + + compare_images(imgs_alt, imgs_vf, 0.99) + finally: + voila_proc.kill() + chrome_driver.close() + time.sleep(0.25) + +def launch_voila(): # Create selenium Chrome instance chrome_opts = webdriver.ChromeOptions() @@ -274,33 +373,17 @@ def test_altair_mock(mock_name, img_tolerance, delay): # Sleep to allow Voila itself to start (this does not include loading a particular dashboard). time.sleep(1.0) - try: - name = mock_name.replace("/", "-") - altair_imgs = export_image_sequence( - chrome_driver, altair_default_notebook, name + "_altair", actions, delay - ) - vegafusion_mime_imgs = export_image_sequence( - chrome_driver, vegafusion_jupyter_notebook, name + "_vegafusion_mime", actions, delay - ) - - for i in range(len(altair_imgs)): - altair_img = altair_imgs[i] - vegafusion_mime_img = vegafusion_mime_imgs[i] + return voila_proc, chrome_driver - assert altair_img.shape == vegafusion_mime_img.shape, "Size mismatch with mime renderer" +def compare_images(baseline_imgs, test_imgs, img_tolerance): - similarity_mime_value = ssim(altair_img, vegafusion_mime_img, channel_axis=2) - print(f"({i}) similarity_mime_value={similarity_mime_value}") + for i, (baseline_img, test_img) in enumerate(zip(baseline_imgs, test_imgs)): + assert baseline_img.shape == test_img.shape, "Size mismatch" - # Allow slightly more image tolerance for mime renderer as floating point differences may - # be introduced by pre-transform process - mime_image_tolerance = img_tolerance * 0.99 - assert similarity_mime_value >= mime_image_tolerance, f"Similarity failed with mime renderer on image {i}" + similarity_mime_value = ssim(baseline_img, test_img, channel_axis=2) + print(f"({i}) similarity_mime_value={similarity_mime_value}") - finally: - voila_proc.kill() - chrome_driver.close() - time.sleep(0.25) + assert similarity_mime_value >= img_tolerance, f"Similarity failed with mime renderer on image {i}" def load_actions(mock_name): From 36222a7b003e8b8968901faa6359f0ef4fa0d34c Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 8 Oct 2024 09:42:23 -0400 Subject: [PATCH 4/5] Rename test file --- .../tests/{test_altair_mocks.py => test_jupyter_widget.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename vegafusion-python/tests/{test_altair_mocks.py => test_jupyter_widget.py} (100%) diff --git a/vegafusion-python/tests/test_altair_mocks.py b/vegafusion-python/tests/test_jupyter_widget.py similarity index 100% rename from vegafusion-python/tests/test_altair_mocks.py rename to vegafusion-python/tests/test_jupyter_widget.py From 592705564b64c05474f7ce0426e27c628f150dd6 Mon Sep 17 00:00:00 2001 From: Jon Mease Date: Tue, 8 Oct 2024 10:06:17 -0400 Subject: [PATCH 5/5] fix ignore path for rename --- .github/workflows/build_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 68154b28..02db3903 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -314,7 +314,7 @@ jobs: - name: Test vegafusion working-directory: vegafusion-python/ - run: pytest --ignore=tests/test_altair_mocks.py + run: pytest --ignore=tests/test_jupyter_widget.py test-vegafusion-python-win-64: runs-on: windows-2022 @@ -351,7 +351,7 @@ jobs: python -m pip install pytest altair vega-datasets scikit-image - name: Test vegafusion working-directory: vegafusion-python/ - run: pytest --ignore=tests/test_altair_mocks.py + run: pytest --ignore=tests/test_jupyter_widget.py build-vegafusion-server-linux-64: