diff --git a/WHAT-WE-LEARNED.md b/WHAT-WE-LEARNED.md index 72ab7ba..8d92f73 100644 --- a/WHAT-WE-LEARNED.md +++ b/WHAT-WE-LEARNED.md @@ -2,15 +2,21 @@ Even if it seems obvious in retrospect, what have we learned about Python Shiny in this project? -## No warning if ID mismatch +## No warning if ID mismatch / type mismatch Unless I'm missing something, there doesn't seem to be any warning when there isn't a matching function name in the server for an ID in the UI. Either from typos, or fumbling some more complicated display logic, there have been times where this could have been helpful. +Related: I had +``` +ui.output_text("epsilon") +``` +but then changed `epsilon` from `render.text` to `reactive.value` and forgot to update the UI. No warning in the logs: Spinner in the browser window. + ## UI and Server functions don't really separate concerns My first impression was that the UI function would be something like a "view" and the server would be a "controller", but for any kind of conditional display I need a `render.ui`, so that distinction breaks down quickly. Just maintaining a naming convention for these little bits of UI in the server gets to be a chore. It would be kludgy, but what if we could suply lambdas instead of names? -## Refactoring: values vs. reactive values +## Values vs. reactive values A couple times I've started with something as a plain value, and then realized I needed a reactive value. This gets confusing if there are merge conflicts, or if some variables are reactive, and some aren't. @@ -44,6 +50,8 @@ Renderer.__call__() missing 1 required positional argument: '_fn' It feels like a gap in the library that there is no component testing. The only advice is to pull out testable logic from the server functions, and for the rest, use end-to-end tests: There's not a recommended way to test the ui+server interaction for just one component. +Short of full component testing, being able to write unit tests around reactive values would be nice. + ## Unstated requirements for module IDs The [docs](https://shiny.posit.co/py/docs/modules.html#how-to-use-modules) only say: @@ -77,6 +85,10 @@ I've had to tweak the CSS a few times: The different flavors of "Shiny" are a bit of nuissance when trying to find examples. The maturity of Shiny for R means that the vast majority of the examples are for R, even with Python in the search. It would be nice if the docs site remembered that I only want to look at docs for Core. +## More validation / type casting on inputs + +If we we imagine we have a field that is a required positive integer, it would be nice to be able to specify that in the input itself, with a default error message handled by the UI, instead of needing to set up a calc on our side. + ## It's easy to forget `return` This is simple, but I was still scratching my head for a while. While there are some cases where returning `None` is intended, is it more more likely to be an error? What if it raised a warning, and an explicit empty string could be returned if that's really what you want? diff --git a/dp_creator_ii/app/__init__.py b/dp_creator_ii/app/__init__.py index b91456d..8031cf4 100644 --- a/dp_creator_ii/app/__init__.py +++ b/dp_creator_ii/app/__init__.py @@ -30,23 +30,45 @@ def server(input, output, session): # pragma: no cover csv_path = reactive.value(cli_info.csv_path) contributions = reactive.value(cli_info.contributions) + lower_bounds = reactive.value({}) + upper_bounds = reactive.value({}) + bin_counts = reactive.value({}) + weights = reactive.value({}) + epsilon = reactive.value(1) + dataset_panel.dataset_server( input, output, session, + is_demo=cli_info.is_demo, csv_path=csv_path, contributions=contributions, - is_demo=cli_info.is_demo, ) analysis_panel.analysis_server( input, output, session, + is_demo=cli_info.is_demo, csv_path=csv_path, contributions=contributions, - is_demo=cli_info.is_demo, + lower_bounds=lower_bounds, + upper_bounds=upper_bounds, + bin_counts=bin_counts, + weights=weights, + epsilon=epsilon, + ) + results_panel.results_server( + input, + output, + session, + csv_path=csv_path, + contributions=contributions, + lower_bounds=lower_bounds, + upper_bounds=upper_bounds, + bin_counts=bin_counts, + weights=weights, + epsilon=epsilon, ) - results_panel.results_server(input, output, session) session.on_ended(ctrl_c_reminder) return server diff --git a/dp_creator_ii/app/analysis_panel.py b/dp_creator_ii/app/analysis_panel.py index 8f50eca..48ea19c 100644 --- a/dp_creator_ii/app/analysis_panel.py +++ b/dp_creator_ii/app/analysis_panel.py @@ -28,41 +28,39 @@ def analysis_ui(): ), ui.output_ui("epsilon_tooltip_ui"), log_slider("log_epsilon_slider", 0.1, 10.0), - ui.output_text("epsilon"), + ui.output_text("epsilon_text"), output_code_sample("Privacy Loss", "privacy_loss_python"), ui.output_ui("download_results_button_ui"), value="analysis_panel", ) +def _cleanup_reactive_dict(reactive_dict, keys_to_keep): # pragma: no cover + reactive_dict_copy = {**reactive_dict()} + keys_to_del = set(reactive_dict_copy.keys()) - set(keys_to_keep) + for key in keys_to_del: + del reactive_dict_copy[key] + reactive_dict.set(reactive_dict_copy) + + def analysis_server( input, output, session, - csv_path=None, - contributions=None, - is_demo=None, + csv_path, + contributions, + is_demo, + lower_bounds, + upper_bounds, + bin_counts, + weights, + epsilon, ): # pragma: no cover @reactive.calc def button_enabled(): column_ids_selected = input.columns_checkbox_group() return len(column_ids_selected) > 0 - weights = reactive.value({}) - - def set_column_weight(column_id, weight): - weights.set({**weights(), column_id: weight}) - - def clear_column_weights(columns_ids_to_keep): - weights_copy = {**weights()} - column_ids_to_del = set(weights_copy.keys()) - set(columns_ids_to_keep) - for column_id in column_ids_to_del: - del weights_copy[column_id] - weights.set(weights_copy) - - def get_weights_sum(): - return sum(weights().values()) - @reactive.effect def _update_checkbox_group(): ui.update_checkbox_group( @@ -75,7 +73,10 @@ def _update_checkbox_group(): @reactive.event(input.columns_checkbox_group) def _on_column_set_change(): column_ids_selected = input.columns_checkbox_group() - clear_column_weights(column_ids_selected) + # We only clean up the weights, and everything else is left in place, + # so if you restore a column, you see the original values. + # (Except for weight, which goes back to the default.) + _cleanup_reactive_dict(weights, column_ids_selected) @render.ui def columns_checkbox_group_tooltip_ui(): @@ -98,9 +99,11 @@ def columns_ui(): column_id, name=column_ids_to_names[column_id], contributions=contributions(), - epsilon=epsilon_calc(), - set_column_weight=set_column_weight, - get_weights_sum=get_weights_sum, + epsilon=epsilon(), + lower_bounds=lower_bounds, + upper_bounds=upper_bounds, + bin_counts=bin_counts, + weights=weights, is_demo=is_demo, ) return [ @@ -130,17 +133,18 @@ def epsilon_tooltip_ui(): """, ) - @reactive.calc - def epsilon_calc(): - return pow(10, input.log_epsilon_slider()) + @reactive.effect + @reactive.event(input.log_epsilon_slider) + def _set_epsilon(): + epsilon.set(pow(10, input.log_epsilon_slider())) @render.text - def epsilon(): - return f"Epsilon: {epsilon_calc():0.3}" + def epsilon_text(): + return f"Epsilon: {epsilon():0.3}" @render.code def privacy_loss_python(): - return make_privacy_loss_block(epsilon_calc()) + return make_privacy_loss_block(epsilon()) @reactive.effect @reactive.event(input.go_to_results) diff --git a/dp_creator_ii/app/components/column_module.py b/dp_creator_ii/app/components/column_module.py index c27e51d..719a1ff 100644 --- a/dp_creator_ii/app/components/column_module.py +++ b/dp_creator_ii/app/components/column_module.py @@ -8,26 +8,31 @@ from dp_creator_ii.app.components.outputs import output_code_sample, demo_tooltip +default_weight = 2 + + @module.ui def column_ui(): # pragma: no cover width = "10em" # Just wide enough so the text isn't trucated. return ui.layout_columns( [ + # The default values on these inputs + # should be overridden by the reactive.effect. ui.output_ui("bounds_tooltip_ui"), - ui.input_numeric("min", "Min", 0, width=width), - ui.input_numeric("max", "Max", 10, width=width), + ui.input_numeric("lower", "Lower", 0, width=width), + ui.input_numeric("upper", "Upper", 0, width=width), ui.output_ui("bins_tooltip_ui"), - ui.input_numeric("bins", "Bins", 10, width=width), + ui.input_numeric("bins", "Bins", 0, width=width), ui.output_ui("weight_tooltip_ui"), ui.input_select( "weight", "Weight", choices={ 1: "Less accurate", - 2: "Default", + default_weight: "Default", 4: "More accurate", }, - selected=2, + selected=default_weight, width=width, ), ], @@ -36,7 +41,7 @@ def column_ui(): # pragma: no cover # https://github.com/opendp/dp-creator-ii/issues/138 ui.markdown( "This simulation assumes a normal distribution " - "between the specified min and max. " + "between the specified lower and upper bounds. " "Your data file has not been read except to determine the columns." ), ui.output_plot("column_plot", height="300px"), @@ -61,23 +66,39 @@ def column_server( name, contributions, epsilon, - set_column_weight, - get_weights_sum, + lower_bounds, + upper_bounds, + bin_counts, + weights, is_demo, ): # pragma: no cover + @reactive.effect + def _set_all_inputs(): + with reactive.isolate(): # Without isolate, there is an infinite loop. + ui.update_numeric("lower", value=lower_bounds().get(name, 0)) + ui.update_numeric("upper", value=upper_bounds().get(name, 10)) + ui.update_numeric("bins", value=bin_counts().get(name, 10)) + ui.update_numeric("weight", value=weights().get(name, default_weight)) + + @reactive.effect + @reactive.event(input.lower) + def _set_lower(): + lower_bounds.set({**lower_bounds(), name: float(input.lower())}) + + @reactive.effect + @reactive.event(input.upper) + def _set_upper(): + upper_bounds.set({**upper_bounds(), name: float(input.upper())}) + + @reactive.effect + @reactive.event(input.bins) + def _set_bins(): + bin_counts.set({**bin_counts(), name: float(input.bins())}) + @reactive.effect @reactive.event(input.weight) - def _(): - set_column_weight(name, float(input.weight())) - - @reactive.calc - def column_config(): - return { - "min": input.min(), - "max": input.max(), - "bins": input.bins(), - "weight": float(input.weight()), - } + def _set_weight(): + weights.set({**weights(), name: float(input.weight())}) @render.ui def bounds_tooltip_ui(): @@ -119,30 +140,28 @@ def weight_tooltip_ui(): @render.code def column_code(): - config = column_config() return make_column_config_block( name=name, - min_value=config["min"], - max_value=config["max"], - bin_count=config["bins"], + lower_bound=float(input.lower()), + upper_bound=float(input.upper()), + bin_count=int(input.bins()), ) @render.plot() def column_plot(): - config = column_config() - min_x = config["min"] - max_x = config["max"] - bin_count = config["bins"] - weight = config["weight"] - weights_sum = get_weights_sum() + lower_x = float(input.lower()) + upper_x = float(input.upper()) + bin_count = int(input.bins()) + weight = float(input.weight()) + weights_sum = sum(weights().values()) info(f"Weight ratio for {name}: {weight}/{weights_sum}") if weights_sum == 0: # This function is triggered when column is removed; # Exit early to avoid divide-by-zero. return None _confidence, accuracy, histogram = make_confidence_accuracy_histogram( - lower=min_x, - upper=max_x, + lower=lower_x, + upper=upper_x, bin_count=bin_count, contributions=contributions, weighted_epsilon=epsilon * weight / weights_sum, diff --git a/dp_creator_ii/app/results_panel.py b/dp_creator_ii/app/results_panel.py index 64d6621..c760717 100644 --- a/dp_creator_ii/app/results_panel.py +++ b/dp_creator_ii/app/results_panel.py @@ -1,3 +1,5 @@ +from json import dumps + from shiny import ui, render from dp_creator_ii.utils.templates import make_notebook_py, make_script_py @@ -7,6 +9,8 @@ def results_ui(): return ui.nav_panel( "Download results", + ui.p("TODO: Use this information to fill in a template!"), + ui.output_code("data_dump"), ui.markdown( "You can now make a differentially private release of your data. " "This will lock the configuration you’ve provided on the previous pages." @@ -29,7 +33,34 @@ def results_ui(): ) -def results_server(input, output, session): # pragma: no cover +def results_server( + input, + output, + session, + csv_path, + contributions, + lower_bounds, + upper_bounds, + bin_counts, + weights, + epsilon, +): # pragma: no cover + @render.code + def data_dump(): + # TODO: Use this information in a template! + return dumps( + { + "csv_path": csv_path(), + "contributions": contributions(), + "lower_bounds": lower_bounds(), + "upper_bounds": upper_bounds(), + "bin_counts": bin_counts(), + "weights": weights(), + "epsilon": epsilon(), + }, + indent=2, + ) + @render.download( filename="dp-creator-script.py", media_type="text/x-python", diff --git a/dp_creator_ii/utils/dp_helper.py b/dp_creator_ii/utils/dp_helper.py index e05088a..c32a3dd 100644 --- a/dp_creator_ii/utils/dp_helper.py +++ b/dp_creator_ii/utils/dp_helper.py @@ -49,7 +49,7 @@ def make_confidence_accuracy_histogram( │ (8, 10] ┆ ... │ └─────────┴─────┘ """ - # Mock data only depends on min and max, so it could be cached, + # Mock data only depends on lower and upper bounds, so it could be cached, # but I'd guess this is dominated by the DP operations, # so not worth optimizing. row_count = 100 diff --git a/dp_creator_ii/utils/mock_data.py b/dp_creator_ii/utils/mock_data.py index 1cde06d..bfdd52c 100644 --- a/dp_creator_ii/utils/mock_data.py +++ b/dp_creator_ii/utils/mock_data.py @@ -2,7 +2,7 @@ import polars as pl from scipy.stats import norm # type: ignore -ColumnDef = namedtuple("ColumnDef", ["min", "max"]) +ColumnDef = namedtuple("ColumnDef", ["lower", "upper"]) def mock_data(column_defs, row_count=1000): @@ -34,12 +34,12 @@ def mock_data(column_defs, row_count=1000): quantile_width = 95 / 100 for column_name, column_def in column_defs.items(): - min_ppf = norm.ppf((1 - quantile_width) / 2) - max_ppf = norm.ppf(1 - (1 - quantile_width) / 2) - min_value = column_def.min - max_value = column_def.max - slope = (max_value - min_value) / (max_ppf - min_ppf) - intercept = min_value - slope * min_ppf + lower_ppf = norm.ppf((1 - quantile_width) / 2) + upper_ppf = norm.ppf(1 - (1 - quantile_width) / 2) + lower_bound = column_def.lower + upper_bound = column_def.upper + slope = (upper_bound - lower_bound) / (upper_ppf - lower_ppf) + intercept = lower_bound - slope * lower_ppf # Start from 1 instead of 0: # The polars bin intervals are closed at the top, # so if we include the zero, there is one value in the diff --git a/dp_creator_ii/utils/templates/__init__.py b/dp_creator_ii/utils/templates/__init__.py index 838979b..8251890 100644 --- a/dp_creator_ii/utils/templates/__init__.py +++ b/dp_creator_ii/utils/templates/__init__.py @@ -143,12 +143,12 @@ def make_privacy_loss_block(epsilon): return str(_Template("privacy_loss").fill_values(EPSILON=epsilon)) -def make_column_config_block(name, min_value, max_value, bin_count): +def make_column_config_block(name, lower_bound, upper_bound, bin_count): """ >>> print(make_column_config_block( ... name="HW GRADE", - ... min_value=0, - ... max_value=100, + ... lower_bound=0, + ... upper_bound=100, ... bin_count=10 ... )) # From the public information, determine the bins: @@ -171,8 +171,8 @@ def make_column_config_block(name, min_value, max_value, bin_count): POLARS_CONFIG_NAME=f"{snake_name}_config", ) .fill_values( - MIN=min_value, - MAX=max_value, + LOWER_BOUND=lower_bound, + UPPER_BOUND=upper_bound, BINS=bin_count, COLUMN_NAME=name, BIN_COLUMN_NAME=f"{snake_name}_bin", diff --git a/dp_creator_ii/utils/templates/no-tests/_column_config.py b/dp_creator_ii/utils/templates/no-tests/_column_config.py index 88b3132..ddb44bd 100644 --- a/dp_creator_ii/utils/templates/no-tests/_column_config.py +++ b/dp_creator_ii/utils/templates/no-tests/_column_config.py @@ -1,5 +1,5 @@ # From the public information, determine the bins: -CUT_LIST_NAME = make_cut_points(MIN, MAX, BINS) +CUT_LIST_NAME = make_cut_points(LOWER_BOUND, UPPER_BOUND, BINS) # Use these bins to define a Polars column: POLARS_CONFIG_NAME = ( diff --git a/tests/test_app.py b/tests/test_app.py index 638fb90..ae20b98 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -9,6 +9,7 @@ default_app = create_app_fixture(Path(__file__).parent / "fixtures/default_app.py") tooltip = "#choose_csv_demo_tooltip_ui svg" for_the_demo = "For the demo, we'll imagine" +simulation = "This simulation assumes a normal distribution" # TODO: Why is incomplete coverage reported here? @@ -86,16 +87,23 @@ def expect_no_error(): # Set column details: page.get_by_label("grade").check() - page.get_by_label("Min").click() - page.get_by_label("Min").fill("0") - # TODO: All these recalculations cause timeouts: + expect_visible(simulation) + # Check that default is set correctly: + assert page.get_by_label("Upper").input_value() == "10" + # Reset, and confirm: + new_value = "20" + page.get_by_label("Upper").fill(new_value) + # Uncheck the column: + page.get_by_label("grade").uncheck() + expect_not_visible(simulation) + # Recheck the column: + page.get_by_label("grade").check() + expect_visible(simulation) + assert page.get_by_label("Upper").input_value() == new_value + # TODO: Setting more inputs without checking for updates + # cause recalculations to pile up, and these cause timeouts on CI: # It is still rerendering the graph after hitting "Download results". # https://github.com/opendp/dp-creator-ii/issues/116 - # page.get_by_label("Max").click() - # page.get_by_label("Max").fill("100") - # page.get_by_label("Bins").click() - # page.get_by_label("Bins").fill("20") - page.get_by_label("Weight").select_option("1") expect_no_error() # -- Download results --