From 0b6f27f4bbc1a3f92133d6fddef19cd6d29daf7c Mon Sep 17 00:00:00 2001 From: Raman Prasad Date: Fri, 25 Mar 2022 07:46:58 -0400 Subject: [PATCH] Link table of contents within the PDF (#577) * part of #554 * little more for #554 Beginnng of adding setup questions to the PDF * include setup questions as part of the release JSON #554 * switch branch to TOC only and leave PDF questions for later #576 --- .../analysis/release_info_formatter.py | 10 + .../analysis/setup_question_formatter.py | 99 +++++++++ server/opendp_apps/analysis/static_vals.py | 66 +++++- .../testing/test_setup_question_formatter.py | 53 +++++ .../analysis/validate_release_util.py | 2 +- .../dp_reports/pdf_report_maker.py | 202 +++++++++++++++--- server/opendp_apps/dp_reports/static_vals.py | 8 +- server/requirements/base.txt | 2 +- server/scripts_dev/build_pdf.py | 10 +- 9 files changed, 413 insertions(+), 39 deletions(-) create mode 100644 server/opendp_apps/analysis/setup_question_formatter.py create mode 100644 server/opendp_apps/analysis/testing/test_setup_question_formatter.py diff --git a/server/opendp_apps/analysis/release_info_formatter.py b/server/opendp_apps/analysis/release_info_formatter.py index af970454..34f5c700 100644 --- a/server/opendp_apps/analysis/release_info_formatter.py +++ b/server/opendp_apps/analysis/release_info_formatter.py @@ -10,6 +10,7 @@ from django.template.loader import render_to_string from opendp_apps.analysis.models import ReleaseInfo +from opendp_apps.analysis.setup_question_formatter import SetupQuestionFormatter from opendp_apps.dataset.dataset_formatter import DataSetFormatter from opendp_apps.model_helpers.basic_err_check import BasicErrCheck @@ -78,6 +79,14 @@ def build_release_data(self): else: dataset_dict = ds_formatter.get_formatted_info() + # depositor setup questions + setup_questions = None + depositor_info = self.dataset.depositor_setup_info + if depositor_info: + setup_formatter = SetupQuestionFormatter(depositor_info) + if not setup_formatter.has_error(): + setup_questions = setup_formatter.as_dict() + self.release_dict = OrderedDict({ "name": str(self.release_util.analysis_plan), # "release_url": None, # via with https://github.com/opendp/dpcreator/issues/34 @@ -94,6 +103,7 @@ def build_release_data(self): "version": self.release_util.opendp_version, }, "dataset": dataset_dict, + "setup_questions": setup_questions, "statistics": self.release_util.get_release_stats() }) diff --git a/server/opendp_apps/analysis/setup_question_formatter.py b/server/opendp_apps/analysis/setup_question_formatter.py new file mode 100644 index 00000000..a2beda2a --- /dev/null +++ b/server/opendp_apps/analysis/setup_question_formatter.py @@ -0,0 +1,99 @@ +""" +Note: Will need some redoing when dataset_questions and epsilon_questions are "collapsed" into one variable + - e.g. https://github.com/opendp/dpcreator/issues/440 +Translate the depositor setup questions into JSON for use in the release +Example output: +- DepositorSetupInfo.dataset_questions +- {"radio_best_describes": "notHarmButConfidential", + "radio_only_one_individual_per_row": "yes", + "radio_depend_on_private_information": "yes"} + +- DepositorSetupInfo.epsilon_questions +- {"secret_sample": "yes", + "population_size": "1000000", + "observations_number_can_be_public": "yes"} +""" +from __future__ import annotations +import json +from django.core.serializers.json import DjangoJSONEncoder + +from opendp_apps.analysis.models import DepositorSetupInfo +from opendp_apps.analysis import static_vals as astatic +from opendp_apps.model_helpers.basic_err_check import BasicErrCheck + +class SetupQuestionFormatter(BasicErrCheck): + """Format the setup questions for use in a release""" + + def __init__(self, depositor_setup_info: DepositorSetupInfo): + self.dsetup_info = depositor_setup_info + self.formatted_questions = [] + + self.format_info() + + def format_info(self): + if self.has_error(): + return + + setup_questions = {} + if self.dsetup_info.dataset_questions: + setup_questions = dict(setup_questions, **self.dsetup_info.dataset_questions) + + if self.dsetup_info.epsilon_questions: + setup_questions = dict(setup_questions, **self.dsetup_info.epsilon_questions) + + qnum = 0 + for qattr in astatic.SETUP_QUESTION_LIST: + qnum += 1 + if qattr in setup_questions: + val = setup_questions.get(qattr) + else: + val = '(not answered)' + + qinfo = astatic.SETUP_QUESTION_LOOKUP.get(qattr) + if qinfo: + qtext, qcontext = astatic.SETUP_QUESTION_LOOKUP.get(qattr) + else: + qtext = None + qcontext = None + + info = dict(question_num=qnum, + text=qtext, + attribute=qattr, + answer=val, + context=qcontext + ) + + # Population size also given, add it to the info dict + if qattr == astatic.SETUP_Q_02_ATTR: + setup_answer = astatic.SETUP_Q_02_ANSWERS.get(val) + if setup_answer and len(setup_answer) == 2: + info['longAnswer'], info['privacy_params'] = setup_answer + + if qattr == astatic.SETUP_Q_04_ATTR and val == 'yes': + info[astatic.SETUP_Q_04a_ATTR] = setup_questions.get(astatic.SETUP_Q_04a_ATTR) + + self.formatted_questions.append(info) + + def as_json(self): + if self.has_error(): + return None + + return json.dumps(self.formatted_questions, cls=DjangoJSONEncoder, indent=4) + + def as_dict(self): + if self.has_error(): + return None + + return self.formatted_questions + +""" +docker-compose run server python manage.py shell + +from opendp_apps.analysis.models import DepositorSetupInfo +from opendp_apps.analysis.setup_question_formatter import SetupQuestionFormatter + +d = DepositorSetupInfo.objects.first() +setup = SetupQuestionFormatter(d) +print(setup.as_json()) + +""" \ No newline at end of file diff --git a/server/opendp_apps/analysis/static_vals.py b/server/opendp_apps/analysis/static_vals.py index 8c83c8c1..ab43bf8b 100644 --- a/server/opendp_apps/analysis/static_vals.py +++ b/server/opendp_apps/analysis/static_vals.py @@ -1,3 +1,4 @@ +from decimal import Decimal NOISE_GEOMETRIC_MECHANISM = 'Geometric' NOISE_LAPLACE_MECHANISM = 'Laplace' @@ -70,11 +71,14 @@ MISSING_VAL_INSERT_RANDOM: "Insert Random Value", MISSING_VAL_INSERT_FIXED: "Insert Fixed Value", } + + def missing_val_label(missing_val_type): assert missing_val_type in MISSING_VAL_HANDING_LABELS,\ f"The type of missing value is unknown! {missing_val_type}" return MISSING_VAL_HANDING_LABELS.get(missing_val_type) + # -------------------------------------- # Error Messages # -------------------------------------- @@ -107,4 +111,64 @@ def missing_val_label(missing_val_type): ERR_MSG_DEPOSIT_NO_JSON_FILE = 'A JSON file is not avilable for deposit.' ERR_MSG_DEPOSIT_NO_PDF_FILE = 'A PDF file is not avilable for deposit.' ERR_MSG_DEPOSIT_NOT_DATAVERSE = 'Deposit functionality is not available for a non-Dataverse file' -ERR_MSG_DEPOSIT_NO_DV_USER = 'The Datavese user could not be for this release.' \ No newline at end of file +ERR_MSG_DEPOSIT_NO_DV_USER = 'The Datavese user could not be for this release.' + +# Setup Questions + +SETUP_Q_01_ATTR = 'radio_depend_on_private_information' +SETUP_Q_01_TEXT = ('Does your data file depend on private information of subjects?', + 'Question to help determine whether differential privacy is appropriate for this data file.') + +SETUP_Q_02_ATTR = 'radio_best_describes' +SETUP_Q_02_TEXT = ('Which of the following best describes your data file?', + 'The answer is used to set privacy parameters (default epsilon and delta values)' + ' which may be changed later in the process.') + +SETUP_Q_02_ANSWERS = dict( + public=('Public Information', None), + notHarmButConfidential=(('Information that, if disclosed,' + ' would not cause material harm,' + ' but which the organization has chosen to keep confidential'), + {'epsilon': 1, 'delta': 10-5}), + couldCauseHarm=(('Information that could cause risk of material harm to individuals' + ' or the organization if disclosed'), + {'epsilon': .25, 'delta': 10e-6}), + wouldLikelyCauseHarm=(('Information that would likely cause serious harm to individuals' + ' or the organization if disclosed'), + {'epsilon': .05, 'delta': 10e-7}), + wouldCauseSevereHarm=(('Information that would cause severe harm to individuals or the' + ' organization if disclosed. Use of this application is not' + ' recommended.'), + None), + ) + + +SETUP_Q_03_ATTR = 'radio_only_one_individual_per_row' +SETUP_Q_03_TEXT = ('Does each individual appear in only one row?', + 'Used to help determine dataset distance.') + +SETUP_Q_04_ATTR = 'secret_sample' +SETUP_Q_04_TEXT = ('Is your data a secret and simple random sample from a larger population?', + ('If the data is a simple random sample, we can use methods (amplification)' + ' to increase the accuracy and utility of the statistics you create.')) + +SETUP_Q_04a_ATTR = 'population_size' # if SETUP_Q_04_ATTR answer is "yes" +SETUP_Q_04a_TEXT = 'Population size' + +SETUP_Q_05_ATTR = 'observations_number_can_be_public' +SETUP_Q_05_TEXT = ('Can the number of observations in your data file be made public knowledge?', + ('If the data file size can be made public, we don\'t need to spend a portion' + ' of your privacy budget to estimate it.')) + +SETUP_QUESTION_LOOKUP = { + SETUP_Q_01_ATTR: SETUP_Q_01_TEXT, + SETUP_Q_02_ATTR: SETUP_Q_02_TEXT, + SETUP_Q_03_ATTR: SETUP_Q_03_TEXT, + SETUP_Q_04_ATTR: SETUP_Q_04_TEXT, + SETUP_Q_05_ATTR: SETUP_Q_05_TEXT, + } +SETUP_QUESTION_LIST = [SETUP_Q_01_ATTR, + SETUP_Q_02_ATTR, + SETUP_Q_03_ATTR, + SETUP_Q_04_ATTR, + SETUP_Q_05_ATTR] diff --git a/server/opendp_apps/analysis/testing/test_setup_question_formatter.py b/server/opendp_apps/analysis/testing/test_setup_question_formatter.py new file mode 100644 index 00000000..cf5aac52 --- /dev/null +++ b/server/opendp_apps/analysis/testing/test_setup_question_formatter.py @@ -0,0 +1,53 @@ +""" +Test of epsilon addition and offsetting floating point anomaly +""" +from django.test import TestCase + +from opendp_apps.analysis import static_vals as astatic +from opendp_apps.analysis.models import DepositorSetupInfo +from opendp_apps.analysis.setup_question_formatter import SetupQuestionFormatter + +from opendp_apps.model_helpers.msg_util import msgt + + +class TestSetupQuestionFormatter(TestCase): + + def setUp(self): + + self.params_01_qs_set1 = {"radio_best_describes": "notHarmButConfidential", + "radio_only_one_individual_per_row": "yes", + "radio_depend_on_private_information": "yes"} + self.params_01_qs_set2 = {"secret_sample": "yes", + "population_size": "1000000", + "observations_number_can_be_public": "yes"} + + self.deposit_info1 = DepositorSetupInfo(**{'dataset_questions': self.params_01_qs_set1, + 'epsilon_questions': self.params_01_qs_set2}) + + self.params_02_qs_set1 = {"radio_best_describes": "notHarmButConfidential", + "radio_only_one_individual_per_row": "yes", + "radio_depend_on_private_information": "yes"} + self.params_02_qs_set2 = {"secret_sample": "no", + "observations_number_can_be_public": "yes"} + + self.deposit_info2 = DepositorSetupInfo(**{'dataset_questions': self.params_02_qs_set1, + 'epsilon_questions': self.params_02_qs_set2}) + + + def test_10_good_format(self): + """Test that the formatter works correctly""" + msgt(self.test_10_good_format.__doc__) + + util = SetupQuestionFormatter(self.deposit_info1) + + fmt_dict = util.as_dict() + print(util.as_json()) + + self.assertEqual(len(fmt_dict), 5) + + self.assertEqual(fmt_dict[1]['attribute'], astatic.SETUP_Q_02_ATTR) + + self.assertEqual(fmt_dict[1]['privacy_params'], + {"epsilon": 1, "delta": 5}) + + self.assertEqual(fmt_dict[3]['population_size'], "1000000") diff --git a/server/opendp_apps/analysis/validate_release_util.py b/server/opendp_apps/analysis/validate_release_util.py index 51eb255a..598aa1ab 100644 --- a/server/opendp_apps/analysis/validate_release_util.py +++ b/server/opendp_apps/analysis/validate_release_util.py @@ -294,7 +294,7 @@ def make_release_info(self, epsilon_used: float): pass else: # pdf_tasks.run_pdf_report_maker.delay(self.release_info.object_id) # async - report_maker = PDFReportMaker(self.release_info.dp_release) + report_maker = PDFReportMaker(self.release_info.dp_release, self.release_info.object_id) if not report_maker.has_error(): report_maker.save_pdf_to_release_obj(self.release_info) # pdf_tasks.run_pdf_report_maker(self.release_info.object_id) # in the loop... diff --git a/server/opendp_apps/dp_reports/pdf_report_maker.py b/server/opendp_apps/dp_reports/pdf_report_maker.py index ec929bb8..96b02a38 100644 --- a/server/opendp_apps/dp_reports/pdf_report_maker.py +++ b/server/opendp_apps/dp_reports/pdf_report_maker.py @@ -7,25 +7,29 @@ from decimal import Decimal import io import json - import os, sys from os.path import abspath, dirname, isfile, join -import dateutil import random import typing +import uuid + CURRENT_DIR = dirname(abspath(__file__)) from django.core.files.base import ContentFile from django.template.loader import render_to_string from django.core.serializers.json import DjangoJSONEncoder +from borb.pdf.canvas.layout.annotation.link_annotation import ( + LinkAnnotation, + DestinationType, +) +from borb.pdf.canvas.layout.annotation.remote_go_to_annotation import RemoteGoToAnnotation from borb.pdf.canvas.layout.image.image import Image from borb.pdf.canvas.layout.image.chart import Chart from borb.pdf.canvas.layout.page_layout.multi_column_layout import SingleColumnLayout from borb.pdf.canvas.layout.page_layout.page_layout import PageLayout -from borb.pdf.document import Document -from borb.pdf.page.page import DestinationType +from borb.pdf.document.document import Document from borb.pdf.page.page import Page from borb.pdf.page.page_size import PageSize from borb.pdf.pdf import PDF @@ -42,6 +46,8 @@ import numpy as np import pandas as pd from opendp_apps.analysis import static_vals as astatic +from opendp_apps.analysis.setup_question_formatter import SetupQuestionFormatter + from opendp_apps.model_helpers.basic_err_check import BasicErrCheck from opendp_apps.dp_reports import pdf_preset_text @@ -53,13 +59,19 @@ class PDFReportMaker(BasicErrCheck): - USAGE_SECTION_TITLE = '4. Usage / Negative Values' + SECTION_TITLE_01_STATISTICS = '1. Statistics' + SECTION_TITLE_02_DATA_SOURCE ='2. Data Source' + SECTION_TITLE_03_OPENDP_LIB = '3. OpenDP Library' + SECTION_TITLE_04_USAGE = '4. Usage / Negative Values' - def __init__(self, release_dict: dict = None): + def __init__(self, release_dict: dict = None, release_object_id: typing.Union[uuid.uuid4, str] = None): """Initalize with a DP Release as Python dict""" - self.release_dict = copy.deepcopy(release_dict) if not release_dict: self.release_dict = self.get_test_release() + else: + self.release_dict = copy.deepcopy(release_dict) + + self.release_object_id = release_object_id # Used to embed the JSON file contents directly to the PDF file self.release_json_bytes = bytes(json.dumps(self.release_dict, indent=4), encoding="latin1") @@ -83,10 +95,12 @@ def __init__(self, release_dict: dict = None): self.page_cnt = 0 self.pdf_doc: Document = Document() + self.first_page = None self.current_page = None # Page self.layout = None # PageLayout self.creation_date = None + self.intrapage_link_info = [] # [[text to link, pdf_object_ref, pdf_page_num, indent], etc.] self.format_release() @@ -111,6 +125,7 @@ def create_pdf(self): self.pdf_doc.add_outline("Statistics", 1, DestinationType.FIT, page_nr=1) self.pdf_doc.add_outline("Data Source", 1, DestinationType.FIT, page_nr=self.page_cnt - 1) + self.add_intra_page_links() # links within the document self.embed_json_release_in_pdf() def save_pdf_to_release_obj(self, release_info_obj: ReleaseInfo): @@ -135,6 +150,24 @@ def save_pdf_to_release_obj(self, release_info_obj: ReleaseInfo): # release_info_obj.save() print(f'File saved to release: {release_info_obj.dp_release_pdf_file}') + def get_embed_json_fname(self): + """Get the name of the JSON file embedded in the PDF""" + if self.release_object_id: + return f'release_data_{self.release_object_id}.json' + + return 'release_data.json' + + def save_intrapage_link(self, _txt_to_link: str, pdf_object_ref, + pdf_page_num: int, indent: int = pdf_static.TOC_L2_LINK_OFFSET): + """ + Collect in-page links to hook up at the end of the process + Note: pdf_page_num input should be 1-indexed. + """ + if self.has_error(): + return + + self.intrapage_link_info.append([_txt_to_link, pdf_object_ref, pdf_page_num, indent]) + def save_pdf_to_file(self, pdf_output_file: str = None): """Save the PDF to a file using the given name. Used for debugging.""" if self.has_error(): @@ -162,6 +195,10 @@ def start_new_page(self): # self.add_header_border_logo(self.current_page) + # Keep a pointer to the first page + if self.page_cnt == 1: + self.first_page = self.current_page + def add_to_layout(self, pdf_element): """Add a PDF element to the document""" try: @@ -196,7 +233,8 @@ def get_test_release(): def embed_json_release_in_pdf(self): """Embed the JSON release in the PDF""" - self.pdf_doc.append_embedded_file("release_data.json", self.release_json_bytes) + self.pdf_doc.append_embedded_file(self.get_embed_json_fname(), + self.release_json_bytes) def get_general_stat_result_desc(self, stat_type_formatted, var_name) -> list: """Return the general "Result" description""" @@ -226,7 +264,8 @@ def get_histogram_stat_result_desc(self, stat_type_formatted: str, var_name: str ] return text_chunks - def get_histogram_accuracy_desc(self, stat_type_formatted: str, var_name: str) -> list: + @staticmethod + def get_histogram_accuracy_desc(stat_type_formatted: str, var_name: str) -> list: """Return the general "Result" description""" text_chunks = [ putil.txt_bld('Result (continued).'), @@ -321,7 +360,7 @@ def add_histogram_plot(self, stat_info: dict, var_name: str): # ------------------------------------- # Make a bar lot # ------------------------------------- - fig = MatPlotLibPlot.figure(tight_layout=True, figsize=[8,6]) + fig = MatPlotLibPlot.figure(tight_layout=True, figsize=[8, 6]) ax = fig.add_subplot() # ------------------------------------- @@ -364,7 +403,7 @@ def add_histogram_plot(self, stat_info: dict, var_name: str): if min(hist_vals) < 0: has_negative_values = True ax.axhline(0, color='black', linewidth=0.8, linestyle='--') - #ax.plot([0., 4.5], [0, 0], "k--") + # ax.plot([0., 4.5], [0, 0], "k--") # If there are negative values--except the last "uncategorized value", change the color for idx, patch in enumerate(bar_container_obj.patches[:-1]): @@ -384,7 +423,7 @@ def add_histogram_plot(self, stat_info: dict, var_name: str): putil.txt_bld('Negative values.'), putil.txt_reg(f' The histogram contains negative values. For more information on how to use '), putil.txt_reg(f' this data, please see the section'), - putil.txt_bld(f' {self.USAGE_SECTION_TITLE}'), + putil.txt_bld(f' {self.SECTION_TITLE_04_USAGE}'), ] self.add_to_layout(HeterogeneousParagraph(text_chunks, @@ -414,7 +453,7 @@ def add_parameter_info(self, stat_info: dict, stat_type_formatted: str): skip_bounds = False is_dp_count = False if stat_info['statistic'] == astatic.DP_COUNT: - is_dp_count= True + is_dp_count = True skip_bounds = True num_param_table_rows = 4 elif stat_info['variable_type'] == VAR_TYPE_CATEGORICAL: @@ -508,7 +547,7 @@ def add_usage_page(self): self.start_new_page() - self.add_to_layout(putil.txt_subtitle_para(self.USAGE_SECTION_TITLE)) + self.add_to_layout(putil.txt_subtitle_para(self.SECTION_TITLE_04_USAGE)) self.add_to_layout(putil.txt_bld_para('(SOME PLACEHOLDER TEXT FOR NOW)')) @@ -552,8 +591,10 @@ def add_data_source_and_lib(self): tbl_src.add(putil.get_tbl_cell_lft_pad(f'Name', padding=self.indent1)) tbl_src.add(putil.get_tbl_cell_lft_pad(f"{dataset_info['installation']['name']}", padding=0)) + dataverse_url = f"{dataset_info['installation']['url']}" tbl_src.add(putil.get_tbl_cell_lft_pad(f'URL', padding=self.indent1)) - tbl_src.add(putil.get_tbl_cell_lft_pad(f"{dataset_info['installation']['url']}", padding=0)) + dv_url_tbl_cell = putil.get_tbl_cell_lft_pad(dataverse_url, padding=0) + tbl_src.add(dv_url_tbl_cell) # ------------------------------ # Dataverse Dataset information @@ -596,6 +637,11 @@ def add_data_source_and_lib(self): self.set_table_borders_padding(tbl_src) self.add_to_layout(tbl_src) + # Add links + self.current_page.append_annotation(RemoteGoToAnnotation( + dv_url_tbl_cell.get_bounding_box(), + uri=dataverse_url)) + self.add_opendp_lib_info() def add_opendp_lib_info(self): @@ -627,16 +673,30 @@ def add_opendp_lib_info(self): tbl_src.add(putil.get_tbl_cell_lft_pad(f'Version', padding=self.indent1)) tbl_src.add(putil.get_tbl_cell_lft_pad(f"{dp_lib_info['version']}", padding=0)) + # Add PyPI info and reference to add link tbl_src.add(putil.get_tbl_cell_lft_pad(f'Python package', padding=self.indent1)) - tbl_src.add(putil.get_tbl_cell_lft_pad('https://pypi.org/project/opendp/', padding=0)) + pypi_tbl_cell = putil.get_tbl_cell_lft_pad(pdf_static.PYPI_OPENDP_URL, padding=0) + tbl_src.add(pypi_tbl_cell) + # Add GitHub repo info and reference to add link + github_repo_url = f"{dp_lib_info['url']}" tbl_src.add(putil.get_tbl_cell_lft_pad(f'GitHub Repository', padding=self.indent1)) - tbl_src.add(putil.get_tbl_cell_lft_pad(f"{dp_lib_info['url']}", padding=0)) + github_tbl_cell = putil.get_tbl_cell_lft_pad(github_repo_url, padding=0) + tbl_src.add(github_tbl_cell) self.set_table_borders_padding(tbl_src) self.add_to_layout(tbl_src) + # Add links + self.current_page.append_annotation(RemoteGoToAnnotation( + pypi_tbl_cell.get_bounding_box(), + uri=pdf_static.PYPI_OPENDP_URL)) + + self.current_page.append_annotation(RemoteGoToAnnotation( + github_tbl_cell.get_bounding_box(), + uri=github_repo_url)) + def add_pdf_title_page(self): """Add the PDF title page""" if self.has_error(): @@ -664,8 +724,8 @@ def add_pdf_title_page(self): font_size=putil.BASIC_FONT_SIZE, multiplied_leading=Decimal(1.75))) - para_attachment = ('Note: If you are using Adobe Acrobat, a JSON version of this data' - ' is attached to this PDF as a file named "release_data.json".') + para_attachment = (f'Note: If you are using Adobe Acrobat, a JSON version of this data' + f' is attached to this PDF as a file named "{self.get_embed_json_fname()}".') self.add_to_layout(Paragraph(para_attachment, font=putil.BASIC_FONT, @@ -677,18 +737,99 @@ def add_pdf_title_page(self): font_size=putil.BASIC_FONT_SIZE, multiplied_leading=Decimal(1.75))) - self.add_to_layout(putil.txt_list_para('1. Statistics')) + para_section1_obj = putil.txt_list_para(self.SECTION_TITLE_01_STATISTICS) + self.add_to_layout(para_section1_obj) stat_cnt = 0 + predicted_page_num = 1 # assumes stat-specific info starts on page 2 + for stat_info in self.release_dict['statistics']: - stat_cnt += 1 + stat_cnt += 1 stat_type = 'DP ' + stat_info['statistic'].title() var_name = stat_info['variable'] - self.add_to_layout(putil.txt_list_para(f'1.{stat_cnt}. {var_name} - {stat_type}', Decimal(60))) - self.add_to_layout(putil.txt_list_para('2. Data Source')) - self.add_to_layout(putil.txt_list_para('3. OpenDP Library')) - self.add_to_layout(putil.txt_list_para(self.USAGE_SECTION_TITLE)) - self.add_to_layout(putil.txt_list_para('5. Parameter Definitions')) + toc_text = f'1.{stat_cnt}. {var_name} - {stat_type}' + pdf_para_obj = putil.txt_list_para(toc_text, + pdf_static.TOC_L2_LINK_OFFSET) + self.add_to_layout(pdf_para_obj) + + # for adding links later--when the stats pages exist! + predicted_page_num += 1 + + # add link for "1. Statistics" -- link it to the 1st stat + if predicted_page_num == 2: + self.save_intrapage_link(self.SECTION_TITLE_01_STATISTICS, + para_section1_obj, + predicted_page_num, pdf_static.TOC_L1_LINK_OFFSET) + + # add link for sub statistic. 1.1, 1.2, etc. + # + self.save_intrapage_link(toc_text, pdf_para_obj, predicted_page_num) + if stat_info['statistic'] == astatic.DP_HISTOGRAM: + predicted_page_num += 1 # histograms take two pages + + # Add other TOC links for sections 2 onward + sections_to_add = [self.SECTION_TITLE_02_DATA_SOURCE, + self.SECTION_TITLE_03_OPENDP_LIB, + self.SECTION_TITLE_04_USAGE, + #'5. Parameter Definitions' + ] + + for sec_text in sections_to_add: + pdf_para_obj = putil.txt_list_para(sec_text) + self.add_to_layout(pdf_para_obj) + if sec_text != self.SECTION_TITLE_03_OPENDP_LIB: # Sections 2 and 3 are on the same page + predicted_page_num += 1 + self.save_intrapage_link(sec_text, pdf_para_obj, + predicted_page_num, pdf_static.TOC_L1_LINK_OFFSET) + + def add_intra_page_links(self): + """ + Add links from the PDF's first page TOC the other pages. TOC example: + 1. Statistics + 1.1. blinkInterval - DP Mean + 1.2. trial - DP Histogram + 1.3. typingSpeed - DP Variance + 2. Data Source + 3. OpenDP Library + 4. Usage / Negative Values + """ + if self.has_error(): + return + + # Interate through the intra page link info and + # add the links within the PDF + # + for _txt_to_link, pdf_object, page_num, indent in self.intrapage_link_info: + """ + _txt_to_link - used for debugging, it's not needed to make the actual PDF link + pdf_object - the source object to add the link to + page_num - the destination page when the pdf_object is clicked + indent - resize the bounding box used for the link source to better fit the text + """ + # print(f'adding link: {_txt_to_link} {pdf_object}, {page_num}, {indent}') + pdf_page_idx = Decimal(page_num) - Decimal(1) # PDF pages within the doc start with 0 + if pdf_page_idx < 0: # shouldn't happen! + self.add_err_msg((f'pdf_report_maker. Error adding TOC links.' + f'pdf_page_idx was less than 0 {pdf_page_idx}')) + return + bounding_box = pdf_object.get_bounding_box() # return a Rectangle object which will be clickable + + # Move the x value and width closer to the text within the pdf_object + bounding_box.x = bounding_box.x + Decimal(indent) + bounding_box.width = bounding_box.width - Decimal(indent - 10) + + # Move the y value and height to better align with the pdf_object text + bounding_box.y = bounding_box.y - Decimal(3) + bounding_box.height = bounding_box.height + Decimal(8) + + # Add the link to the PDF! + link_annotation = LinkAnnotation( + bounding_box, + page=pdf_page_idx, + destination_type=DestinationType.FIT, + color=HexColor("#ffffff"), # Without this, a black border is placed around the clickable area + ) + self.first_page.append_annotation(link_annotation) @staticmethod def get_layout_box(p: Paragraph) -> Rectangle: @@ -725,8 +866,9 @@ def add_histogram_result_table(self, stat_info: dict, stat_type: str, var_name: # Statistic name and result tbl_result.add(putil.get_tbl_cell_lft_pad(f'DP {stat_type}', padding=0)) categories = stat_info['result']['value']['categories'] - result_text = 'The results, in JSON format, may accessed through the PDF attachemnt "release_data.json"' - if len(categories) == 1: + result_text = (f'The results, in JSON format, may accessed' + f' through the PDF attachment "{self.get_embed_json_fname()}"') + if len(categories) == 1: tbl_result.add(putil.get_tbl_cell_lft_pad(f"(1 bin/category). {result_text}", padding=0)) else: tbl_result.add(putil.get_tbl_cell_lft_pad(f"({len(categories)} bins/categories). {result_text}", @@ -905,5 +1047,5 @@ def add_header_border_logo(self, page: Page) -> None: logo_img_obj.layout(page, rect_logo) # Link logo to opendp.org url - page.append_remote_go_to_annotation(logo_img_obj.get_bounding_box(), - uri="https://www.opendp.org") + page.append_annotation(RemoteGoToAnnotation(logo_img_obj.get_bounding_box(), + uri="https://www.opendp.org")) diff --git a/server/opendp_apps/dp_reports/static_vals.py b/server/opendp_apps/dp_reports/static_vals.py index c5a628cd..9c8c36f8 100644 --- a/server/opendp_apps/dp_reports/static_vals.py +++ b/server/opendp_apps/dp_reports/static_vals.py @@ -1,5 +1,11 @@ +from decimal import Decimal from os.path import abspath, dirname, join from pathlib import Path IMAGE_DIR = join(dirname(abspath(__file__)), 'static', 'images') -DPCREATOR_LOGO_PATH = Path(join(IMAGE_DIR, 'dpcreator_logo.png')) \ No newline at end of file +DPCREATOR_LOGO_PATH = Path(join(IMAGE_DIR, 'dpcreator_logo.png')) + +PYPI_OPENDP_URL = 'https://pypi.org/project/opendp/' + +TOC_L1_LINK_OFFSET = Decimal(40) +TOC_L2_LINK_OFFSET = Decimal(60) \ No newline at end of file diff --git a/server/requirements/base.txt b/server/requirements/base.txt index eab6d7be..7ba6848b 100644 --- a/server/requirements/base.txt +++ b/server/requirements/base.txt @@ -46,7 +46,7 @@ channels==3.0.3 channels-redis==3.2.0 # for PDF creation -borb==2.0.18 +borb==2.0.21 matplotlib==3.4.3 # for testing, mocks requests diff --git a/server/scripts_dev/build_pdf.py b/server/scripts_dev/build_pdf.py index 15144630..582db758 100644 --- a/server/scripts_dev/build_pdf.py +++ b/server/scripts_dev/build_pdf.py @@ -1,14 +1,14 @@ -import json from load_django_settings import CURRENT_DIR, TEST_DATA_DIR, load_local_settings load_local_settings() +import uuid from opendp_apps.dp_reports.pdf_report_maker import PDFReportMaker + def make_test_report(): - rm = PDFReportMaker() + rm = PDFReportMaker(None, uuid.uuid4()) rm.save_pdf_to_file() - -if __name__=='__main__': - make_test_report() \ No newline at end of file +if __name__ == '__main__': + make_test_report()