From 3526c1aa7aecb630050d705d10283c87a4e5ff2d Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Thu, 17 Oct 2024 16:04:22 -0400 Subject: [PATCH] Remove output cells --- demo.ipynb | 352 ++--------------------------------------------------- 1 file changed, 9 insertions(+), 343 deletions(-) diff --git a/demo.ipynb b/demo.ipynb index c06d427..1c90ffd 100644 --- a/demo.ipynb +++ b/demo.ipynb @@ -9,6 +9,15 @@ "First, generate a fake dataset. In the future, let's check it in and use it if the [`--demo` flag](https://github.com/opendp/dp-creator-ii/issues/7) is given." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Make mock \n", + "\n", + "When [Add `--demo` CLI option](https://github.com/opendp/dp-creator-ii/pull/61) is merged, reference that code and delete these cells.\n" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -742,349 +751,6 @@ "source": [ "At this point, the privacy budget of the context, configured at the start with `epsilon` and `weights`, is exhausted: attempting to make another release will result in an error." ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "\n", - "# Other exports\n", - "\n", - "Below is a proposal for what the other export formats (text and CSV) would look like. We first make a data structure with everything we need, and then use generic methods to serialize that structure." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'inputs': {'csv_path': '/tmp/demo.csv',\n", - " 'contributions': 10,\n", - " 'epsilon': 2,\n", - " 'weights': [4, 4, 1, 1],\n", - " 'max_possible_rows': 1000000,\n", - " 'delta': 1e-07,\n", - " 'grade': {'min': 50, 'max': 100, 'bins_count': 10},\n", - " 'class_year': {'min': 1, 'max': 4, 'bins_count': 4}},\n", - " 'outputs': {'grade': {'mean': 84.25140291806959,\n", - " 'histogram': {'(55, 60]': 24,\n", - " '(60, 65]': 0,\n", - " '(65, 70]': 28,\n", - " '(70, 75]': 181,\n", - " '(75, 80]': 227,\n", - " '(80, 85]': 248,\n", - " '(85, 90]': 204,\n", - " '(90, 95]': 110,\n", - " '(95, inf]': 0}},\n", - " 'class_year': {'mean': 1.8125701459034793,\n", - " 'histogram': {'(-inf, 1]': 420,\n", - " '(1, 2]': 311,\n", - " '(2, 3]': 80,\n", - " '(3, inf]': 47}}}}" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "release = {\n", - " 'inputs': {\n", - " 'csv_path': csv_path,\n", - " 'contributions': contributions,\n", - " 'epsilon': epsilon,\n", - " 'weights': weights,\n", - " 'max_possible_rows': max_possible_rows,\n", - " 'delta': delta,\n", - " 'grade': {\n", - " 'min': grade_min,\n", - " 'max': grade_max,\n", - " 'bins_count': grade_bins_count,\n", - " },\n", - " 'class_year': {\n", - " 'min': class_year_min,\n", - " 'max': class_year_max,\n", - " 'bins_count': class_year_bins_count,\n", - " } \n", - " },\n", - " 'outputs': {\n", - " 'grade': {\n", - " 'mean': grade_mean.item(),\n", - " 'histogram': {v['grade_bin']: v['len'] for v in grade_histogram.to_dicts()}\n", - " },\n", - " 'class_year': {\n", - " 'mean': class_year_mean.item(),\n", - " 'histogram': {v['class_year_bin']: v['len'] for v in class_year_histogram.to_dicts()}\n", - " },\n", - " }\n", - "}\n", - "release" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Text export?\n", - "\n", - "Just use YAML, unless there are other requirements?" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "inputs:\n", - " class_year:\n", - " bins_count: 4\n", - " max: 4\n", - " min: 1\n", - " contributions: 10\n", - " csv_path: /tmp/demo.csv\n", - " delta: 1.0e-07\n", - " epsilon: 2\n", - " grade:\n", - " bins_count: 10\n", - " max: 100\n", - " min: 50\n", - " max_possible_rows: 1000000\n", - " weights:\n", - " - 4\n", - " - 4\n", - " - 1\n", - " - 1\n", - "outputs:\n", - " class_year:\n", - " histogram:\n", - " (-inf, 1]: 420\n", - " (1, 2]: 311\n", - " (2, 3]: 80\n", - " (3, inf]: 47\n", - " mean: 1.8125701459034793\n", - " grade:\n", - " histogram:\n", - " (55, 60]: 24\n", - " (60, 65]: 0\n", - " (65, 70]: 28\n", - " (70, 75]: 181\n", - " (75, 80]: 227\n", - " (80, 85]: 248\n", - " (85, 90]: 204\n", - " (90, 95]: 110\n", - " (95, inf]: 0\n", - " mean: 84.25140291806959\n", - "\n" - ] - } - ], - "source": [ - "import yaml\n", - "\n", - "print(yaml.dump(release))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### CSV export?\n", - "\n", - "Flatten the data stucture to key value pairs and make a two-column CSV unless there are other requirements?" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
inputs.csv_path/tmp/demo.csv
inputs.contributions10
inputs.epsilon2
inputs.weights[4, 4, 1, 1]
inputs.max_possible_rows1000000
inputs.delta0.0
inputs.grade.min50
inputs.grade.max100
inputs.grade.bins_count10
inputs.class_year.min1
inputs.class_year.max4
inputs.class_year.bins_count4
outputs.grade.mean84.251403
outputs.grade.histogram.(55, 60]24
outputs.grade.histogram.(60, 65]0
outputs.grade.histogram.(65, 70]28
outputs.grade.histogram.(70, 75]181
outputs.grade.histogram.(75, 80]227
outputs.grade.histogram.(80, 85]248
outputs.grade.histogram.(85, 90]204
outputs.grade.histogram.(90, 95]110
outputs.grade.histogram.(95, inf]0
outputs.class_year.mean1.81257
outputs.class_year.histogram.(-inf, 1]420
outputs.class_year.histogram.(1, 2]311
outputs.class_year.histogram.(2, 3]80
outputs.class_year.histogram.(3, inf]47
\n", - "
" - ], - "text/plain": [ - " 0\n", - "inputs.csv_path /tmp/demo.csv\n", - "inputs.contributions 10\n", - "inputs.epsilon 2\n", - "inputs.weights [4, 4, 1, 1]\n", - "inputs.max_possible_rows 1000000\n", - "inputs.delta 0.0\n", - "inputs.grade.min 50\n", - "inputs.grade.max 100\n", - "inputs.grade.bins_count 10\n", - "inputs.class_year.min 1\n", - "inputs.class_year.max 4\n", - "inputs.class_year.bins_count 4\n", - "outputs.grade.mean 84.251403\n", - "outputs.grade.histogram.(55, 60] 24\n", - "outputs.grade.histogram.(60, 65] 0\n", - "outputs.grade.histogram.(65, 70] 28\n", - "outputs.grade.histogram.(70, 75] 181\n", - "outputs.grade.histogram.(75, 80] 227\n", - "outputs.grade.histogram.(80, 85] 248\n", - "outputs.grade.histogram.(85, 90] 204\n", - "outputs.grade.histogram.(90, 95] 110\n", - "outputs.grade.histogram.(95, inf] 0\n", - "outputs.class_year.mean 1.81257\n", - "outputs.class_year.histogram.(-inf, 1] 420\n", - "outputs.class_year.histogram.(1, 2] 311\n", - "outputs.class_year.histogram.(2, 3] 80\n", - "outputs.class_year.histogram.(3, inf] 47" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from pandas import json_normalize\n", - "\n", - "json_normalize(release).transpose()" - ] } ], "metadata": {