Skip to content

Commit

Permalink
Link table of contents within the PDF (#577)
Browse files Browse the repository at this point in the history
* part of #554

* little more for #554

Beginnng of adding setup questions to the PDF

* include setup questions as part of the release JSON #554

* switch branch to TOC only and leave PDF questions for later #576
  • Loading branch information
raprasad committed Mar 25, 2022
1 parent 867b75e commit 0b6f27f
Show file tree
Hide file tree
Showing 9 changed files with 413 additions and 39 deletions.
10 changes: 10 additions & 0 deletions server/opendp_apps/analysis/release_info_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from django.template.loader import render_to_string

from opendp_apps.analysis.models import ReleaseInfo
from opendp_apps.analysis.setup_question_formatter import SetupQuestionFormatter
from opendp_apps.dataset.dataset_formatter import DataSetFormatter
from opendp_apps.model_helpers.basic_err_check import BasicErrCheck

Expand Down Expand Up @@ -78,6 +79,14 @@ def build_release_data(self):
else:
dataset_dict = ds_formatter.get_formatted_info()

# depositor setup questions
setup_questions = None
depositor_info = self.dataset.depositor_setup_info
if depositor_info:
setup_formatter = SetupQuestionFormatter(depositor_info)
if not setup_formatter.has_error():
setup_questions = setup_formatter.as_dict()

self.release_dict = OrderedDict({
"name": str(self.release_util.analysis_plan),
# "release_url": None, # via with https://github.com/opendp/dpcreator/issues/34
Expand All @@ -94,6 +103,7 @@ def build_release_data(self):
"version": self.release_util.opendp_version,
},
"dataset": dataset_dict,
"setup_questions": setup_questions,
"statistics": self.release_util.get_release_stats()
})

Expand Down
99 changes: 99 additions & 0 deletions server/opendp_apps/analysis/setup_question_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""
Note: Will need some redoing when dataset_questions and epsilon_questions are "collapsed" into one variable
- e.g. https://github.com/opendp/dpcreator/issues/440
Translate the depositor setup questions into JSON for use in the release
Example output:
- DepositorSetupInfo.dataset_questions
- {"radio_best_describes": "notHarmButConfidential",
"radio_only_one_individual_per_row": "yes",
"radio_depend_on_private_information": "yes"}
- DepositorSetupInfo.epsilon_questions
- {"secret_sample": "yes",
"population_size": "1000000",
"observations_number_can_be_public": "yes"}
"""
from __future__ import annotations
import json
from django.core.serializers.json import DjangoJSONEncoder

from opendp_apps.analysis.models import DepositorSetupInfo
from opendp_apps.analysis import static_vals as astatic
from opendp_apps.model_helpers.basic_err_check import BasicErrCheck

class SetupQuestionFormatter(BasicErrCheck):
"""Format the setup questions for use in a release"""

def __init__(self, depositor_setup_info: DepositorSetupInfo):
self.dsetup_info = depositor_setup_info
self.formatted_questions = []

self.format_info()

def format_info(self):
if self.has_error():
return

setup_questions = {}
if self.dsetup_info.dataset_questions:
setup_questions = dict(setup_questions, **self.dsetup_info.dataset_questions)

if self.dsetup_info.epsilon_questions:
setup_questions = dict(setup_questions, **self.dsetup_info.epsilon_questions)

qnum = 0
for qattr in astatic.SETUP_QUESTION_LIST:
qnum += 1
if qattr in setup_questions:
val = setup_questions.get(qattr)
else:
val = '(not answered)'

qinfo = astatic.SETUP_QUESTION_LOOKUP.get(qattr)
if qinfo:
qtext, qcontext = astatic.SETUP_QUESTION_LOOKUP.get(qattr)
else:
qtext = None
qcontext = None

info = dict(question_num=qnum,
text=qtext,
attribute=qattr,
answer=val,
context=qcontext
)

# Population size also given, add it to the info dict
if qattr == astatic.SETUP_Q_02_ATTR:
setup_answer = astatic.SETUP_Q_02_ANSWERS.get(val)
if setup_answer and len(setup_answer) == 2:
info['longAnswer'], info['privacy_params'] = setup_answer

if qattr == astatic.SETUP_Q_04_ATTR and val == 'yes':
info[astatic.SETUP_Q_04a_ATTR] = setup_questions.get(astatic.SETUP_Q_04a_ATTR)

self.formatted_questions.append(info)

def as_json(self):
if self.has_error():
return None

return json.dumps(self.formatted_questions, cls=DjangoJSONEncoder, indent=4)

def as_dict(self):
if self.has_error():
return None

return self.formatted_questions

"""
docker-compose run server python manage.py shell
from opendp_apps.analysis.models import DepositorSetupInfo
from opendp_apps.analysis.setup_question_formatter import SetupQuestionFormatter
d = DepositorSetupInfo.objects.first()
setup = SetupQuestionFormatter(d)
print(setup.as_json())
"""
66 changes: 65 additions & 1 deletion server/opendp_apps/analysis/static_vals.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from decimal import Decimal

NOISE_GEOMETRIC_MECHANISM = 'Geometric'
NOISE_LAPLACE_MECHANISM = 'Laplace'
Expand Down Expand Up @@ -70,11 +71,14 @@
MISSING_VAL_INSERT_RANDOM: "Insert Random Value",
MISSING_VAL_INSERT_FIXED: "Insert Fixed Value",
}


def missing_val_label(missing_val_type):
assert missing_val_type in MISSING_VAL_HANDING_LABELS,\
f"The type of missing value is unknown! {missing_val_type}"
return MISSING_VAL_HANDING_LABELS.get(missing_val_type)


# --------------------------------------
# Error Messages
# --------------------------------------
Expand Down Expand Up @@ -107,4 +111,64 @@ def missing_val_label(missing_val_type):
ERR_MSG_DEPOSIT_NO_JSON_FILE = 'A JSON file is not avilable for deposit.'
ERR_MSG_DEPOSIT_NO_PDF_FILE = 'A PDF file is not avilable for deposit.'
ERR_MSG_DEPOSIT_NOT_DATAVERSE = 'Deposit functionality is not available for a non-Dataverse file'
ERR_MSG_DEPOSIT_NO_DV_USER = 'The Datavese user could not be for this release.'
ERR_MSG_DEPOSIT_NO_DV_USER = 'The Datavese user could not be for this release.'

# Setup Questions

SETUP_Q_01_ATTR = 'radio_depend_on_private_information'
SETUP_Q_01_TEXT = ('Does your data file depend on private information of subjects?',
'Question to help determine whether differential privacy is appropriate for this data file.')

SETUP_Q_02_ATTR = 'radio_best_describes'
SETUP_Q_02_TEXT = ('Which of the following best describes your data file?',
'The answer is used to set privacy parameters (default epsilon and delta values)'
' which may be changed later in the process.')

SETUP_Q_02_ANSWERS = dict(
public=('Public Information', None),
notHarmButConfidential=(('Information that, if disclosed,'
' would not cause material harm,'
' but which the organization has chosen to keep confidential'),
{'epsilon': 1, 'delta': 10-5}),
couldCauseHarm=(('Information that could cause risk of material harm to individuals'
' or the organization if disclosed'),
{'epsilon': .25, 'delta': 10e-6}),
wouldLikelyCauseHarm=(('Information that would likely cause serious harm to individuals'
' or the organization if disclosed'),
{'epsilon': .05, 'delta': 10e-7}),
wouldCauseSevereHarm=(('Information that would cause severe harm to individuals or the'
' organization if disclosed. Use of this application is not'
' recommended.'),
None),
)


SETUP_Q_03_ATTR = 'radio_only_one_individual_per_row'
SETUP_Q_03_TEXT = ('Does each individual appear in only one row?',
'Used to help determine dataset distance.')

SETUP_Q_04_ATTR = 'secret_sample'
SETUP_Q_04_TEXT = ('Is your data a secret and simple random sample from a larger population?',
('If the data is a simple random sample, we can use methods (amplification)'
' to increase the accuracy and utility of the statistics you create.'))

SETUP_Q_04a_ATTR = 'population_size' # if SETUP_Q_04_ATTR answer is "yes"
SETUP_Q_04a_TEXT = 'Population size'

SETUP_Q_05_ATTR = 'observations_number_can_be_public'
SETUP_Q_05_TEXT = ('Can the number of observations in your data file be made public knowledge?',
('If the data file size can be made public, we don\'t need to spend a portion'
' of your privacy budget to estimate it.'))

SETUP_QUESTION_LOOKUP = {
SETUP_Q_01_ATTR: SETUP_Q_01_TEXT,
SETUP_Q_02_ATTR: SETUP_Q_02_TEXT,
SETUP_Q_03_ATTR: SETUP_Q_03_TEXT,
SETUP_Q_04_ATTR: SETUP_Q_04_TEXT,
SETUP_Q_05_ATTR: SETUP_Q_05_TEXT,
}
SETUP_QUESTION_LIST = [SETUP_Q_01_ATTR,
SETUP_Q_02_ATTR,
SETUP_Q_03_ATTR,
SETUP_Q_04_ATTR,
SETUP_Q_05_ATTR]
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
Test of epsilon addition and offsetting floating point anomaly
"""
from django.test import TestCase

from opendp_apps.analysis import static_vals as astatic
from opendp_apps.analysis.models import DepositorSetupInfo
from opendp_apps.analysis.setup_question_formatter import SetupQuestionFormatter

from opendp_apps.model_helpers.msg_util import msgt


class TestSetupQuestionFormatter(TestCase):

def setUp(self):

self.params_01_qs_set1 = {"radio_best_describes": "notHarmButConfidential",
"radio_only_one_individual_per_row": "yes",
"radio_depend_on_private_information": "yes"}
self.params_01_qs_set2 = {"secret_sample": "yes",
"population_size": "1000000",
"observations_number_can_be_public": "yes"}

self.deposit_info1 = DepositorSetupInfo(**{'dataset_questions': self.params_01_qs_set1,
'epsilon_questions': self.params_01_qs_set2})

self.params_02_qs_set1 = {"radio_best_describes": "notHarmButConfidential",
"radio_only_one_individual_per_row": "yes",
"radio_depend_on_private_information": "yes"}
self.params_02_qs_set2 = {"secret_sample": "no",
"observations_number_can_be_public": "yes"}

self.deposit_info2 = DepositorSetupInfo(**{'dataset_questions': self.params_02_qs_set1,
'epsilon_questions': self.params_02_qs_set2})


def test_10_good_format(self):
"""Test that the formatter works correctly"""
msgt(self.test_10_good_format.__doc__)

util = SetupQuestionFormatter(self.deposit_info1)

fmt_dict = util.as_dict()
print(util.as_json())

self.assertEqual(len(fmt_dict), 5)

self.assertEqual(fmt_dict[1]['attribute'], astatic.SETUP_Q_02_ATTR)

self.assertEqual(fmt_dict[1]['privacy_params'],
{"epsilon": 1, "delta": 5})

self.assertEqual(fmt_dict[3]['population_size'], "1000000")
2 changes: 1 addition & 1 deletion server/opendp_apps/analysis/validate_release_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def make_release_info(self, epsilon_used: float):
pass
else:
# pdf_tasks.run_pdf_report_maker.delay(self.release_info.object_id) # async
report_maker = PDFReportMaker(self.release_info.dp_release)
report_maker = PDFReportMaker(self.release_info.dp_release, self.release_info.object_id)
if not report_maker.has_error():
report_maker.save_pdf_to_release_obj(self.release_info)
# pdf_tasks.run_pdf_report_maker(self.release_info.object_id) # in the loop...
Expand Down
Loading

0 comments on commit 0b6f27f

Please sign in to comment.