From 09daeb249acec9f3202d09c6107e8b99ae471e7c Mon Sep 17 00:00:00 2001 From: Raman Prasad Date: Thu, 7 Sep 2023 09:09:44 -0400 Subject: [PATCH] Add json schema for releases (#847) * In process for #812 * for #812 * #812 schema --- .gitignore | 4 +- server/migrate.sh | 1 + .../testing/base_analysis_plan_test.py | 1 - ..._695cb409-0f99-4c50-9f04-e0a94b029ade.json | 511 ++++++++++++++++++ .../opendp_apps/release_schemas/__init__.py | 0 server/opendp_apps/release_schemas/admin.py | 38 ++ server/opendp_apps/release_schemas/apps.py | 6 + .../fixtures/schema_fixture_v0-2-0.json | 317 +++++++++++ server/opendp_apps/release_schemas/forms.py | 43 ++ .../migrations/0001_initial.py | 36 ++ .../release_schemas/migrations/__init__.py | 0 server/opendp_apps/release_schemas/models.py | 87 +++ .../release_schemas/schema_validator.py | 50 ++ .../schemas/schema_v0.2.0.json | 299 ++++++++++ .../snippet_dpcreator_schema_v01.beta.json | 297 ++++++++++ .../release_schemas/serializers.py | 13 + .../release_schemas/static_vals.py | 3 + .../release_schemas/testing/__init__.py | 4 + .../schema_examples/01_invalid_schema.json | 27 + .../release_v0.2.0_test_01.json | 106 ++++ .../testing/test_release_schemas.py | 72 +++ server/opendp_apps/release_schemas/tests.py | 3 + .../opendp_apps/release_schemas/validators.py | 52 ++ server/opendp_apps/release_schemas/views.py | 48 ++ server/opendp_project/settings/base.py | 1 + server/opendp_project/urls.py | 2 + server/requirements/base.txt | 3 +- server/scripts_dev/test_schema.py | 43 ++ 28 files changed, 2064 insertions(+), 3 deletions(-) create mode 100644 server/opendp_apps/dp_reports/test_data/release_695cb409-0f99-4c50-9f04-e0a94b029ade.json create mode 100644 server/opendp_apps/release_schemas/__init__.py create mode 100644 server/opendp_apps/release_schemas/admin.py create mode 100644 server/opendp_apps/release_schemas/apps.py create mode 100644 server/opendp_apps/release_schemas/fixtures/schema_fixture_v0-2-0.json create mode 100644 server/opendp_apps/release_schemas/forms.py create mode 100644 server/opendp_apps/release_schemas/migrations/0001_initial.py create mode 100644 server/opendp_apps/release_schemas/migrations/__init__.py create mode 100644 server/opendp_apps/release_schemas/models.py create mode 100644 server/opendp_apps/release_schemas/schema_validator.py create mode 100644 server/opendp_apps/release_schemas/schemas/schema_v0.2.0.json create mode 100644 server/opendp_apps/release_schemas/schemas/snippet_dpcreator_schema_v01.beta.json create mode 100644 server/opendp_apps/release_schemas/serializers.py create mode 100644 server/opendp_apps/release_schemas/static_vals.py create mode 100644 server/opendp_apps/release_schemas/testing/__init__.py create mode 100644 server/opendp_apps/release_schemas/testing/schema_examples/01_invalid_schema.json create mode 100644 server/opendp_apps/release_schemas/testing/schema_examples/release_v0.2.0_test_01.json create mode 100644 server/opendp_apps/release_schemas/testing/test_release_schemas.py create mode 100644 server/opendp_apps/release_schemas/tests.py create mode 100644 server/opendp_apps/release_schemas/validators.py create mode 100644 server/opendp_apps/release_schemas/views.py create mode 100644 server/scripts_dev/test_schema.py diff --git a/.gitignore b/.gitignore index 006036cc..ef6228c2 100644 --- a/.gitignore +++ b/.gitignore @@ -175,7 +175,9 @@ server/release-files/* server/scripts_dev/dv_test_creds.py .env !server/opendp_apps/banner_messages/fixtures/*.json -!server/opendp_apps/banner_messages/fixtures/*.json +!server/opendp_apps/release_schemas/fixtures/*.json +!server/opendp_apps/release_schemas/schemas/*.json +!server/opendp_apps/release_schemas/testing/schema_examples/*.json !server/opendp_apps/dataverses/fixtures/*.json !server/opendp_apps/profiler/fixtures/*.json !server/opendp_apps/profiler/testing/test_files/* diff --git a/server/migrate.sh b/server/migrate.sh index 5c4d48b6..aec19ea6 100755 --- a/server/migrate.sh +++ b/server/migrate.sh @@ -37,6 +37,7 @@ python /code/server/manage.py loaddata opendp_apps/dataverses/fixtures/terms.jso python /code/server/manage.py loaddata opendp_apps/dataverses/fixtures/test_dataverses_01.json # python /code/server/manage.py loaddata opendp_apps/dataverses/fixtures/test_user_emailaddress.json python /code/server/manage.py loaddata opendp_apps/banner_messages/fixtures/*.json +python /code/server/manage.py loaddata opendp_apps/release_schemas/fixtures/*.json # opendp_apps/banner_messages/fixtures/*.json # opendp_apps/dataset/fixtures/*.json exec "$@" diff --git a/server/opendp_apps/analysis/testing/base_analysis_plan_test.py b/server/opendp_apps/analysis/testing/base_analysis_plan_test.py index 627c5c25..acd6592e 100644 --- a/server/opendp_apps/analysis/testing/base_analysis_plan_test.py +++ b/server/opendp_apps/analysis/testing/base_analysis_plan_test.py @@ -52,4 +52,3 @@ def setUp(self): epsilon=0.25, expiration_date=self.expiration_date_str) # print(json.dumps(self.working_plan_info, indent=4)) - diff --git a/server/opendp_apps/dp_reports/test_data/release_695cb409-0f99-4c50-9f04-e0a94b029ade.json b/server/opendp_apps/dp_reports/test_data/release_695cb409-0f99-4c50-9f04-e0a94b029ade.json new file mode 100644 index 00000000..b196dfae --- /dev/null +++ b/server/opendp_apps/dp_reports/test_data/release_695cb409-0f99-4c50-9f04-e0a94b029ade.json @@ -0,0 +1,511 @@ +{ + "name": "Plan A", + "created": { + "iso": "2023-08-31T19:51:52.589029", + "human_readable": "August 31, 2023 at 19:51:52:589029 ", + "human_readable_date_only": "31 August, 2023" + }, + "application": "DP Creator", + "application_url": "https://github.com/opendp/dpcreator", + "differentially_private_library": { + "name": "OpenDP", + "url": "https://github.com/opendp/opendp", + "version": "0.6.1" + }, + "dataset": { + "type": "upload", + "name": "Teacher Survey", + "fileFormat": "(unknown file type)", + "creator": { + "first_name": "DP", + "last_name": "Analyst", + "email": "test_analyst@opendp.org" + }, + "upload_date": { + "iso": "2023-08-31T19:45:39.154207+00:00", + "human_readable": "August 31, 2023 at 19:45:39:154207 UTC", + "human_readable_date_only": "31 August, 2023" + } + }, + "setup_questions": [ + { + "question_num": 1, + "text": "Does your data file depend on private information of subjects?", + "attribute": "radio_depend_on_private_information", + "answer": "yes", + "context": "Question to help determine whether differential privacy is appropriate for this data file." + }, + { + "question_num": 2, + "text": "Which of the following best describes your data file?", + "attribute": "radio_best_describes", + "answer": "notHarmButConfidential", + "context": "The answer is used to set privacy parameters (default epsilon and delta values) which may be changed later in the process.", + "longAnswer": "Information that, if disclosed, would not cause material harm, but which the organization has chosen to keep confidential", + "privacy_params": { + "epsilon": 1, + "delta": 1e-05 + } + }, + { + "question_num": 3, + "text": "Does each individual appear in only one row?", + "attribute": "radio_only_one_individual_per_row", + "answer": "yes", + "context": "Used to help determine dataset distance." + }, + { + "question_num": 4, + "text": "Is your data a secret and simple random sample from a larger population?", + "attribute": "secret_sample", + "answer": "no", + "context": "If the data is a simple random sample, we can use methods (amplification) to increase the accuracy and utility of the statistics you create." + }, + { + "question_num": 5, + "text": "Can the number of observations in your data file be made public knowledge?", + "attribute": "observations_number_can_be_public", + "answer": "yes", + "context": "If the data file size can be made public, we don't need to spend a portion of your privacy budget to estimate it." + } + ], + "statistics": [ + { + "statistic": "histogram", + "variable": "smoking", + "variable_type": "Boolean", + "result": { + "value": { + "categories": [ + 1, + 2, + "uncategorized" + ], + "values": [ + 1355, + 5611, + 51 + ], + "category_value_pairs": [ + [ + 1, + 1355 + ], + [ + 2, + 5611 + ], + [ + "uncategorized", + 51 + ] + ] + } + }, + "noise_mechanism": "Geometric", + "epsilon": 0.21428571428571427, + "delta": null, + "boolean_values": { + "true_value": 1, + "false_value": 2 + }, + "missing_value_handling": { + "type": "insert_fixed", + "fixed_value": "2" + }, + "confidence_level": 0.95, + "confidence_level_alpha": 0.05, + "accuracy": { + "value": 17.21477078586504, + "message": "There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 17.21477078586504 units. Here the units are the same units the variable smoking has in the dataset." + }, + "description": { + "html": "A differentially private Histogram for variable smoking was calculated with the result {'categories': [1, 2, 'uncategorized'], 'values': [1355, 5611, 51], 'category_value_pairs': [(1, 1355), (2, 5611), ('uncategorized', 51)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 17.21477078586504 units. Here the units are the same units the variable smoking has in the dataset.", + "text": "A differentially private Histogram for variable \"smoking\" was calculated with the result {'categories': [1, 2, 'uncategorized'], 'values': [1355, 5611, 51], 'category_value_pairs': [(1, 1355), (2, 5611), ('uncategorized', 51)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 17.21477078586504 units. Here the units are the same units the variable smoking has in the dataset." + } + }, + { + "statistic": "mean", + "variable": "age", + "variable_type": "Integer", + "result": { + "value": 37.418067531157185 + }, + "noise_mechanism": "Laplace", + "epsilon": 0.21428571428571427, + "delta": null, + "bounds": { + "min": 20.0, + "max": 75.0 + }, + "missing_value_handling": { + "type": "insert_fixed", + "fixed_value": 42.0 + }, + "confidence_level": 0.95, + "confidence_level_alpha": 0.05, + "accuracy": { + "value": 0.10984351670312595, + "message": "There is a probability of 95.0% that the DP Mean will differ from the true Mean by at most 0.10984351670312595 units. Here the units are the same units the variable age has in the dataset." + }, + "description": { + "html": "A differentially private Mean for variable age was calculated with the result 37.418067531157185. There is a probability of 95.0% that the DP Mean will differ from\nthe true Mean by at most 0.10984351670312595 units. Here the units are the same units\nthe variable age has in the dataset.", + "text": "A differentially private Mean for variable \"age\" was calculated with the result 37.418067531157185. There is a probability of 95.0% that the DP Mean will differ from the true Mean by at most 0.10984351670312595 units. Here the units are the same units the variable age has in the dataset." + } + }, + { + "statistic": "histogram", + "variable": "sourceofstress", + "variable_type": "Integer", + "result": { + "value": { + "categories": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + "uncategorized" + ], + "values": [ + 3839, + 188, + 187, + 405, + 415, + 313, + 510, + 870, + 240, + -7 + ], + "category_value_pairs": [ + [ + 1, + 3839 + ], + [ + 2, + 188 + ], + [ + 3, + 187 + ], + [ + 4, + 405 + ], + [ + 5, + 415 + ], + [ + 6, + 313 + ], + [ + 7, + 510 + ], + [ + 8, + 870 + ], + [ + 9, + 240 + ], + [ + "uncategorized", + -7 + ] + ] + } + }, + "noise_mechanism": "Geometric", + "epsilon": 0.21428571428571427, + "delta": null, + "bounds": { + "min": 1, + "max": 9 + }, + "missing_value_handling": { + "type": "insert_fixed", + "fixed_value": 1 + }, + "confidence_level": 0.95, + "confidence_level_alpha": 0.05, + "accuracy": { + "value": 24.23379863748765, + "message": "There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 24.23379863748765 units. Here the units are the same units the variable sourceofstress has in the dataset." + }, + "description": { + "html": "A differentially private Histogram for variable sourceofstress was calculated with the result {'categories': [1, 2, 3, 4, 5, 6, 7, 8, 9, 'uncategorized'], 'values': [3839, 188, 187, 405, 415, 313, 510, 870, 240, -7], 'category_value_pairs': [(1, 3839), (2, 188), (3, 187), (4, 405), (5, 415), (6, 313), (7, 510), (8, 870), (9, 240), ('uncategorized', -7)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 24.23379863748765 units. Here the units are the same units the variable sourceofstress has in the dataset.", + "text": "A differentially private Histogram for variable \"sourceofstress\" was calculated with the result {'categories': [1, 2, 3, 4, 5, 6, 7, 8, 9, 'uncategorized'], 'values': [3839, 188, 187, 405, 415, 313, 510, 870, 240, -7], 'category_value_pairs': [(1, 3839), (2, 188), (3, 187), (4, 405), (5, 415), (6, 313), (7, 510), (8, 870), (9, 240), ('uncategorized', -7)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 24.23379863748765 units. Here the units are the same units the variable sourceofstress has in the dataset." + } + }, + { + "statistic": "histogram", + "variable": "highesteducationlevel", + "variable_type": "Integer", + "result": { + "value": { + "categories": [ + 1, + 2, + 3, + 4, + 5, + 6, + "uncategorized" + ], + "values": [ + 30, + 848, + 1368, + 1903, + 1963, + 887, + 2 + ], + "category_value_pairs": [ + [ + 1, + 30 + ], + [ + 2, + 848 + ], + [ + 3, + 1368 + ], + [ + 4, + 1903 + ], + [ + 5, + 1963 + ], + [ + 6, + 887 + ], + [ + "uncategorized", + 2 + ] + ] + } + }, + "noise_mechanism": "Geometric", + "epsilon": 0.21428571428571427, + "delta": null, + "bounds": { + "min": 1, + "max": 6 + }, + "missing_value_handling": { + "type": "insert_fixed", + "fixed_value": 5 + }, + "confidence_level": 0.95, + "confidence_level_alpha": 0.05, + "accuracy": { + "value": 22.341628132982883, + "message": "There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 22.341628132982883 units. Here the units are the same units the variable highesteducationlevel has in the dataset." + }, + "description": { + "html": "A differentially private Histogram for variable highesteducationlevel was calculated with the result {'categories': [1, 2, 3, 4, 5, 6, 'uncategorized'], 'values': [30, 848, 1368, 1903, 1963, 887, 2], 'category_value_pairs': [(1, 30), (2, 848), (3, 1368), (4, 1903), (5, 1963), (6, 887), ('uncategorized', 2)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 22.341628132982883 units. Here the units are the same units the variable highesteducationlevel has in the dataset.", + "text": "A differentially private Histogram for variable \"highesteducationlevel\" was calculated with the result {'categories': [1, 2, 3, 4, 5, 6, 'uncategorized'], 'values': [30, 848, 1368, 1903, 1963, 887, 2], 'category_value_pairs': [(1, 30), (2, 848), (3, 1368), (4, 1903), (5, 1963), (6, 887), ('uncategorized', 2)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 22.341628132982883 units. Here the units are the same units the variable highesteducationlevel has in the dataset." + } + }, + { + "statistic": "histogram", + "variable": "optimism", + "variable_type": "Integer", + "result": { + "value": { + "categories": [ + "[6, 13]", + "[14, 21]", + "[22, 30]", + "uncategorized" + ], + "histogram_bin_edges": [ + 6, + 14, + 22, + 31 + ], + "values": [ + 206, + 2876, + 3923, + 2 + ], + "category_value_pairs": [ + [ + "[6, 13]", + 206 + ], + [ + "[14, 21]", + 2876 + ], + [ + "[22, 30]", + 3923 + ], + [ + "uncategorized", + 2 + ] + ] + } + }, + "noise_mechanism": "Geometric", + "epsilon": 0.21428571428571427, + "delta": null, + "bounds": { + "min": 6, + "max": 30 + }, + "missing_value_handling": { + "type": "insert_fixed", + "fixed_value": 15 + }, + "confidence_level": 0.95, + "confidence_level_alpha": 0.05, + "accuracy": { + "value": 19.106941290369804, + "message": "There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 19.106941290369804 units. Here the units are the same units the variable optimism has in the dataset." + }, + "description": { + "html": "A differentially private Histogram for variable optimism was calculated with the result {'categories': ['[6, 13]', '[14, 21]', '[22, 30]', 'uncategorized'], 'histogram_bin_edges': [6, 14, 22, 31], 'values': [206, 2876, 3923, 2], 'category_value_pairs': [('[6, 13]', 206), ('[14, 21]', 2876), ('[22, 30]', 3923), ('uncategorized', 2)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 19.106941290369804 units. Here the units are the same units the variable optimism has in the dataset.", + "text": "A differentially private Histogram for variable \"optimism\" was calculated with the result {'categories': ['[6, 13]', '[14, 21]', '[22, 30]', 'uncategorized'], 'histogram_bin_edges': [6, 14, 22, 31], 'values': [206, 2876, 3923, 2], 'category_value_pairs': [('[6, 13]', 206), ('[14, 21]', 2876), ('[22, 30]', 3923), ('uncategorized', 2)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 19.106941290369804 units. Here the units are the same units the variable optimism has in the dataset." + } + }, + { + "statistic": "count", + "variable": "maritalstatus", + "variable_type": "Categorical", + "result": { + "value": 7001 + }, + "noise_mechanism": "Geometric", + "epsilon": 0.21428571428571427, + "delta": null, + "missing_value_handling": { + "type": "" + }, + "confidence_level": 0.95, + "confidence_level_alpha": 0.05, + "accuracy": { + "value": 13.980083943251959, + "message": "There is a probability of 95.0% that the DP Count will differ from the true Count by at most 13.980083943251959 units. Here the units are the same units the variable maritalstatus has in the dataset." + }, + "description": { + "html": "A differentially private Count for variable maritalstatus was calculated with the result 7001. There is a probability of 95.0% that the DP Count will differ from\nthe true Count by at most 13.980083943251959 units. Here the units are the same units\nthe variable maritalstatus has in the dataset.", + "text": "A differentially private Count for variable \"maritalstatus\" was calculated with the result 7001. There is a probability of 95.0% that the DP Count will differ from the true Count by at most 13.980083943251959 units. Here the units are the same units the variable maritalstatus has in the dataset." + } + }, + { + "statistic": "histogram", + "variable": "maritalstatus", + "variable_type": "Integer", + "result": { + "value": { + "categories": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + "uncategorized" + ], + "values": [ + 1680, + 590, + 593, + 3014, + 476, + 165, + 387, + 112, + 26 + ], + "category_value_pairs": [ + [ + 1, + 1680 + ], + [ + 2, + 590 + ], + [ + 3, + 593 + ], + [ + 4, + 3014 + ], + [ + 5, + 476 + ], + [ + 6, + 165 + ], + [ + 7, + 387 + ], + [ + 8, + 112 + ], + [ + "uncategorized", + 26 + ] + ] + } + }, + "noise_mechanism": "Geometric", + "epsilon": 0.21428571428571427, + "delta": null, + "bounds": { + "min": 1, + "max": 8 + }, + "missing_value_handling": { + "type": "insert_fixed", + "fixed_value": 2 + }, + "confidence_level": 0.95, + "confidence_level_alpha": 0.05, + "accuracy": { + "value": 23.68414447109119, + "message": "There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 23.68414447109119 units. Here the units are the same units the variable maritalstatus has in the dataset." + }, + "description": { + "html": "A differentially private Histogram for variable maritalstatus was calculated with the result {'categories': [1, 2, 3, 4, 5, 6, 7, 8, 'uncategorized'], 'values': [1680, 590, 593, 3014, 476, 165, 387, 112, 26], 'category_value_pairs': [(1, 1680), (2, 590), (3, 593), (4, 3014), (5, 476), (6, 165), (7, 387), (8, 112), ('uncategorized', 26)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 23.68414447109119 units. Here the units are the same units the variable maritalstatus has in the dataset.", + "text": "A differentially private Histogram for variable \"maritalstatus\" was calculated with the result {'categories': [1, 2, 3, 4, 5, 6, 7, 8, 'uncategorized'], 'values': [1680, 590, 593, 3014, 476, 165, 387, 112, 26], 'category_value_pairs': [(1, 1680), (2, 590), (3, 593), (4, 3014), (5, 476), (6, 165), (7, 387), (8, 112), ('uncategorized', 26)]}. There is a probability of 95.0% that a count in the DP Histogram will differ from the count in the true Histogram by at most 23.68414447109119 units. Here the units are the same units the variable maritalstatus has in the dataset." + } + } + ] +} \ No newline at end of file diff --git a/server/opendp_apps/release_schemas/__init__.py b/server/opendp_apps/release_schemas/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/server/opendp_apps/release_schemas/admin.py b/server/opendp_apps/release_schemas/admin.py new file mode 100644 index 00000000..301708f1 --- /dev/null +++ b/server/opendp_apps/release_schemas/admin.py @@ -0,0 +1,38 @@ +# Register your models here. +from django.contrib import admin + +from opendp_apps.release_schemas.forms import ReleaseInfoSchemaForm +from opendp_apps.release_schemas.models import ReleaseInfoSchema + + +class ReleaseInfoSchemaAdmin(admin.ModelAdmin): + form = ReleaseInfoSchemaForm + save_on_top = True + search_fields = ('title',) + list_filter = ('is_published',) + list_display = ('version', + 'title', + 'is_published', + 'description', + 'updated', + 'created',) + readonly_fields = ('title', + 'sortable_version', + 'schema_link', + 'id_link', + 'schema_display', + 'created', + 'updated',) + fields = ['version', + 'title', + 'is_published', + 'schema', + 'description', + 'schema_link', + 'id_link', + 'schema_display', + 'created', + 'updated', ] + + +admin.site.register(ReleaseInfoSchema, ReleaseInfoSchemaAdmin) diff --git a/server/opendp_apps/release_schemas/apps.py b/server/opendp_apps/release_schemas/apps.py new file mode 100644 index 00000000..fb9d4b42 --- /dev/null +++ b/server/opendp_apps/release_schemas/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class ReleaseSchemasConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'opendp_apps.release_schemas' diff --git a/server/opendp_apps/release_schemas/fixtures/schema_fixture_v0-2-0.json b/server/opendp_apps/release_schemas/fixtures/schema_fixture_v0-2-0.json new file mode 100644 index 00000000..1e395602 --- /dev/null +++ b/server/opendp_apps/release_schemas/fixtures/schema_fixture_v0-2-0.json @@ -0,0 +1,317 @@ +[ + { + "model": "release_schemas.releaseinfoschema", + "pk": 2, + "fields": { + "created": "2023-09-05T17:40:56.186Z", + "updated": "2023-09-06T18:25:39.620Z", + "object_id": "6e8c338a-00a9-4d68-9cad-ab55f4cb20c6", + "version": "0.2.0", + "title": "OpenDP - DP Creator Schema 0.2.0", + "description": "", + "is_published": true, + "schema": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "http://localhost:8000/api/schema/0.2.0/", + "version": "0.2.0", + "title": "OpenDP - DP Creator Schema 0.2.0", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "created": { + "type": "object", + "properties": { + "iso": { + "type": "string" + }, + "human_readable": { + "type": "string" + }, + "human_readable_date_only": { + "type": "string" + } + }, + "required": [ + "iso", + "human_readable", + "human_readable_date_only" + ] + }, + "application": { + "type": "string" + }, + "application_url": { + "type": "string" + }, + "differentially_private_library": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "url": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "required": [ + "name", + "url", + "version" + ] + }, + "dataset": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "name": { + "type": "string" + }, + "fileFormat": { + "type": "string" + }, + "creator": { + "type": "object", + "properties": { + "first_name": { + "type": "string" + }, + "last_name": { + "type": "string" + }, + "email": { + "type": "string" + } + }, + "required": [ + "first_name", + "last_name", + "email" + ] + }, + "upload_date": { + "type": "object", + "properties": { + "iso": { + "type": "string" + }, + "human_readable": { + "type": "string" + }, + "human_readable_date_only": { + "type": "string" + } + }, + "required": [ + "iso", + "human_readable", + "human_readable_date_only" + ] + } + }, + "required": [ + "type", + "name", + "fileFormat", + "creator", + "upload_date" + ] + }, + "setup_questions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "question_num": { + "type": "number" + }, + "text": { + "type": "string" + }, + "attribute": { + "type": "string" + }, + "answer": { + "type": "string" + }, + "context": { + "type": "string" + }, + "longAnswer": { + "type": "string" + }, + "privacy_params": { + "type": "object", + "properties": { + "epsilon": { + "type": "number" + }, + "delta": { + "type": "number" + } + }, + "required": [ + "epsilon", + "delta" + ] + } + }, + "required": [ + "question_num", + "text", + "attribute", + "answer", + "context" + ] + } + }, + "statistics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "statistic": { + "type": "string" + }, + "variable": { + "type": "string" + }, + "variable_type": { + "type": "string" + }, + "result": { + "type": "object", + "properties": { + "value": {} + }, + "required": [ + "value" + ] + }, + "noise_mechanism": { + "type": "string" + }, + "epsilon": { + "type": "number" + }, + "delta": {}, + "boolean_values": { + "type": "object", + "properties": { + "true_value": { + "type": "number" + }, + "false_value": { + "type": "number" + } + }, + "required": [ + "true_value", + "false_value" + ] + }, + "missing_value_handling": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "fixed_value": {} + }, + "required": [ + "type", + "fixed_value" + ] + }, + "confidence_level": { + "type": "number" + }, + "confidence_level_alpha": { + "type": "number" + }, + "accuracy": { + "type": "object", + "properties": { + "value": { + "type": "number" + }, + "message": { + "type": "string" + } + }, + "required": [ + "value", + "message" + ] + }, + "description": { + "type": "object", + "properties": { + "html": { + "type": "string" + }, + "text": { + "type": "string" + } + }, + "required": [ + "html", + "text" + ] + }, + "bounds": { + "type": "object", + "properties": { + "min": { + "type": "number" + }, + "max": { + "type": "number" + } + }, + "required": [ + "min", + "max" + ] + } + }, + "required": [ + "statistic", + "variable", + "variable_type", + "result", + "noise_mechanism", + "epsilon", + "delta", + "missing_value_handling", + "confidence_level", + "confidence_level_alpha", + "accuracy", + "description" + ] + } + } + }, + "required": [ + "name", + "created", + "application", + "application_url", + "differentially_private_library", + "dataset", + "setup_questions", + "statistics" + ] + }, + "schema_link": "https://json-schema.org/draft/2020-12/schema", + "id_link": "http://localhost:8000/api/schema/0.2.0/", + "sortable_version": "v-0000-0002-0000" + } + } +] \ No newline at end of file diff --git a/server/opendp_apps/release_schemas/forms.py b/server/opendp_apps/release_schemas/forms.py new file mode 100644 index 00000000..0383269e --- /dev/null +++ b/server/opendp_apps/release_schemas/forms.py @@ -0,0 +1,43 @@ +from django.core.exceptions import ValidationError +from django.forms import ModelForm +from django.utils.translation import gettext_lazy as _ + +from opendp_apps.release_schemas.models import ReleaseInfoSchema +from opendp_apps.release_schemas.validators import \ + (format_semantic_version) + + +class ReleaseInfoSchemaForm(ModelForm): + class Meta: + model = ReleaseInfoSchema + fields = '__all__' + + def clean(self): + """Validate the data. Check that the version number is in the name""" + version = self.cleaned_data.get("version") + schema = self.cleaned_data.get("schema") + + errors = {} + if version and schema: + schema_version = schema.get('version', None) + if schema_version is None: + errors['schema'] = _(f'The schema requires a "version" key.') + elif not version == schema_version: + errors['version'] = _((f'The version, "{version}", does not match the' + f' schema\'s "version", which has the value' + f' "{schema_version}".')) + + if errors: + raise ValidationError(errors) + + return self.cleaned_data + + def clean_version(self): + """Format the version number""" + data = self.cleaned_data.get('version') + if not data: + raise ValidationError('"version" is required') + + data = format_semantic_version(data) + + return data diff --git a/server/opendp_apps/release_schemas/migrations/0001_initial.py b/server/opendp_apps/release_schemas/migrations/0001_initial.py new file mode 100644 index 00000000..20f7c6d0 --- /dev/null +++ b/server/opendp_apps/release_schemas/migrations/0001_initial.py @@ -0,0 +1,36 @@ +# Generated by Django 4.2 on 2023-09-06 17:46 + +from django.db import migrations, models +import opendp_apps.release_schemas.validators +import uuid + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='ReleaseInfoSchema', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created', models.DateTimeField(auto_now_add=True)), + ('updated', models.DateTimeField(auto_now=True)), + ('object_id', models.UUIDField(default=uuid.uuid4, editable=False)), + ('version', models.CharField(help_text='Semantic version. Example: 1.4.2', max_length=50, unique=True, validators=[opendp_apps.release_schemas.validators.validate_semantic_version_number], verbose_name='Version number')), + ('title', models.CharField(help_text='(auto-generated) Name of the schema. Include the "Version number" in the name.', max_length=128, unique=True)), + ('is_published', models.BooleanField(default=False)), + ('schema', models.JSONField(help_text='JSON schema', validators=[opendp_apps.release_schemas.validators.validate_json_schema])), + ('schema_link', models.URLField(blank=True, help_text='(auto-filled) Link to the schema, using the keyword "$schema"')), + ('id_link', models.URLField(blank=True, help_text='(auto-filled) Link to the "id", trying the keywords "id" and "$id"')), + ('description', models.TextField(blank=True)), + ('sortable_version', models.CharField(help_text='Auto-populated. Sortable version of the schema.', max_length=50)), + ], + options={ + 'ordering': ['-sortable_version'], + }, + ), + ] diff --git a/server/opendp_apps/release_schemas/migrations/__init__.py b/server/opendp_apps/release_schemas/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/server/opendp_apps/release_schemas/models.py b/server/opendp_apps/release_schemas/models.py new file mode 100644 index 00000000..6519bfb0 --- /dev/null +++ b/server/opendp_apps/release_schemas/models.py @@ -0,0 +1,87 @@ +import json +import logging + +from django.conf import settings +from django.db import models +from django.utils.safestring import mark_safe + +from opendp_apps.model_helpers.models import TimestampedModelWithUUID +from opendp_apps.release_schemas import static_vals as rstatic +from opendp_apps.release_schemas.validators import \ + (validate_semantic_version_number, + validate_json_schema, + get_sortable_semantic_version) + +logger = logging.getLogger(settings.DEFAULT_LOGGER) + + +class ReleaseInfoSchema(TimestampedModelWithUUID): + """ + Schema Information for a release + """ + version = models.CharField(max_length=50, + unique=True, + verbose_name='Version number', + help_text='Semantic version. Example: 1.4.2', + validators=[validate_semantic_version_number]) + + title = models.CharField(max_length=128, + blank=True, + help_text='(auto-generated) Value of the schema\'s "title" property.') + + description = models.TextField(blank=True, help_text='Description of the schema') + + is_published = models.BooleanField(default=False) + + schema = models.JSONField(help_text='JSON schema', + validators=[validate_json_schema]) + + schema_link = models.URLField(blank=True, + help_text=('(auto-filled) Value of the schema\'s' + ' "$schema" property, if available')) + + id_link = models.URLField(blank=True, + help_text=('(auto-filled) Value of the schema\'s "$id"' + ' property, if available')) + + sortable_version = models.CharField(max_length=50, + help_text=('(auto-filled) Sortable version number' + ' of the schema.')) + + class Meta: + ordering = ['-sortable_version'] + + def __str__(self): + return f'{self.title} ({self.version})' + + def save(self): + """Create a sortable version of the semantic version number""" + # self.version = format_semantic_version(self.version) + self.sortable_version = get_sortable_semantic_version(self.version) + self.schema_link = self.get_schema_value('$schema', rstatic.SCHEMA_FIELD_NOT_SET) + self.title = self.get_schema_value('title', f'Version {self.version}') + self.id_link = self.get_schema_value('$id', rstatic.SCHEMA_FIELD_NOT_SET) + + super().save() + + @mark_safe + def schema_display(self): + """Return a string representation of the schema for the admin""" + if self.schema: + return """
{}
""".format(json.dumps(self.schema, indent=4)) + + return '(not available)' + + @mark_safe + def get_schema_value(self, key, default=None): + """Return the link to the schema, using the keyword '$schema' """ + if not self.schema: + return None + + return self.schema.get(key, default) + + def get_title(self): + if not self.schema: + return f'Version {self.version}' + return self.schema.get('title', f'Version {self.version}') + diff --git a/server/opendp_apps/release_schemas/schema_validator.py b/server/opendp_apps/release_schemas/schema_validator.py new file mode 100644 index 00000000..2a0fa829 --- /dev/null +++ b/server/opendp_apps/release_schemas/schema_validator.py @@ -0,0 +1,50 @@ +import jsonschema +from jsonschema.exceptions import ValidationError as JsonSchemaValidationError +from opendp_apps.model_helpers.basic_err_check import BasicErrCheck + + +class SchemaValidator(BasicErrCheck): + """Validate a JSON schema""" + + def __init__(self, schema_dict, release_info): + self.schema_dict = schema_dict + self.release_info = release_info + + self.run_precheck() + self.validate_schema() + + def run_precheck(self): + """Check the data before validating""" + if not isinstance(self.schema_dict, dict): + self.add_err_msg('Schema data (schema_dict) is not a dict!') + return + + try: + jsonschema.Validator.check_schema(self.schema_dict) + except jsonschema.exceptions.SchemaError as err_obj: + self.add_err_msg(f'Error in schema: {err_obj.message}') + return + + if not self.schema_dict: + self.add_err_msg('Schema data (schema_dict) is empty!') + + if not isinstance(self.release_info, dict): + self.add_err_msg('Release info (release_info) is not a dict!') + return + + if not self.release_info: + self.add_err_msg('Release info (release_info) is empty!') + return + + def validate_schema(self): + """Validate the schema""" + if self.has_error(): + return + + try: + jsonschema.validate(instance=self.release_info, + schema=self.schema_dict) + except JsonSchemaValidationError as err_obj: + # import pdb; pdb.set_trace() + self.add_err_msg(err_obj.message) + return diff --git a/server/opendp_apps/release_schemas/schemas/schema_v0.2.0.json b/server/opendp_apps/release_schemas/schemas/schema_v0.2.0.json new file mode 100644 index 00000000..592610ce --- /dev/null +++ b/server/opendp_apps/release_schemas/schemas/schema_v0.2.0.json @@ -0,0 +1,299 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "id": "http://localhost:8000/api/schema/0.2.0/", + "version": "0.2.0", + "title": "OpenDP - DP Creator Schema 0.2.0", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "created": { + "type": "object", + "properties": { + "iso": { + "type": "string" + }, + "human_readable": { + "type": "string" + }, + "human_readable_date_only": { + "type": "string" + } + }, + "required": [ + "iso", + "human_readable", + "human_readable_date_only" + ] + }, + "application": { + "type": "string" + }, + "application_url": { + "type": "string" + }, + "differentially_private_library": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "url": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "required": [ + "name", + "url", + "version" + ] + }, + "dataset": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "name": { + "type": "string" + }, + "fileFormat": { + "type": "string" + }, + "creator": { + "type": "object", + "properties": { + "first_name": { + "type": "string" + }, + "last_name": { + "type": "string" + }, + "email": { + "type": "string" + } + }, + "required": [ + "first_name", + "last_name", + "email" + ] + }, + "upload_date": { + "type": "object", + "properties": { + "iso": { + "type": "string" + }, + "human_readable": { + "type": "string" + }, + "human_readable_date_only": { + "type": "string" + } + }, + "required": [ + "iso", + "human_readable", + "human_readable_date_only" + ] + } + }, + "required": [ + "type", + "name", + "fileFormat", + "creator", + "upload_date" + ] + }, + "setup_questions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "question_num": { + "type": "number" + }, + "text": { + "type": "string" + }, + "attribute": { + "type": "string" + }, + "answer": { + "type": "string" + }, + "context": { + "type": "string" + }, + "longAnswer": { + "type": "string" + }, + "privacy_params": { + "type": "object", + "properties": { + "epsilon": { + "type": "number" + }, + "delta": { + "type": "number" + } + }, + "required": [ + "epsilon", + "delta" + ] + } + }, + "required": [ + "question_num", + "text", + "attribute", + "answer", + "context" + ] + } + }, + "statistics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "statistic": { + "type": "string" + }, + "variable": { + "type": "string" + }, + "variable_type": { + "type": "string" + }, + "result": { + "type": "object", + "properties": { + "value": {} + }, + "required": [ + "value" + ] + }, + "noise_mechanism": { + "type": "string" + }, + "epsilon": { + "type": "number" + }, + "delta": {}, + "boolean_values": { + "type": "object", + "properties": { + "true_value": { + "type": "number" + }, + "false_value": { + "type": "number" + } + }, + "required": [ + "true_value", + "false_value" + ] + }, + "missing_value_handling": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "fixed_value": {} + }, + "required": [ + "type", + "fixed_value" + ] + }, + "confidence_level": { + "type": "number" + }, + "confidence_level_alpha": { + "type": "number" + }, + "accuracy": { + "type": "object", + "properties": { + "value": { + "type": "number" + }, + "message": { + "type": "string" + } + }, + "required": [ + "value", + "message" + ] + }, + "description": { + "type": "object", + "properties": { + "html": { + "type": "string" + }, + "text": { + "type": "string" + } + }, + "required": [ + "html", + "text" + ] + }, + "bounds": { + "type": "object", + "properties": { + "min": { + "type": "number" + }, + "max": { + "type": "number" + } + }, + "required": [ + "min", + "max" + ] + } + }, + "required": [ + "statistic", + "variable", + "variable_type", + "result", + "noise_mechanism", + "epsilon", + "delta", + "missing_value_handling", + "confidence_level", + "confidence_level_alpha", + "accuracy", + "description" + ] + } + } + }, + "required": [ + "name", + "created", + "application", + "application_url", + "differentially_private_library", + "dataset", + "setup_questions", + "statistics" + ] +} \ No newline at end of file diff --git a/server/opendp_apps/release_schemas/schemas/snippet_dpcreator_schema_v01.beta.json b/server/opendp_apps/release_schemas/schemas/snippet_dpcreator_schema_v01.beta.json new file mode 100644 index 00000000..8f9b7b40 --- /dev/null +++ b/server/opendp_apps/release_schemas/schemas/snippet_dpcreator_schema_v01.beta.json @@ -0,0 +1,297 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Generated schema for Root", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "created": { + "type": "object", + "properties": { + "iso": { + "type": "string" + }, + "human_readable": { + "type": "string" + }, + "human_readable_date_only": { + "type": "string" + } + }, + "required": [ + "iso", + "human_readable", + "human_readable_date_only" + ] + }, + "application": { + "type": "string" + }, + "application_url": { + "type": "string" + }, + "differentially_private_library": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "url": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "required": [ + "name", + "url", + "version" + ] + }, + "dataset": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "name": { + "type": "string" + }, + "fileFormat": { + "type": "string" + }, + "creator": { + "type": "object", + "properties": { + "first_name": { + "type": "string" + }, + "last_name": { + "type": "string" + }, + "email": { + "type": "string" + } + }, + "required": [ + "first_name", + "last_name", + "email" + ] + }, + "upload_date": { + "type": "object", + "properties": { + "iso": { + "type": "string" + }, + "human_readable": { + "type": "string" + }, + "human_readable_date_only": { + "type": "string" + } + }, + "required": [ + "iso", + "human_readable", + "human_readable_date_only" + ] + } + }, + "required": [ + "type", + "name", + "fileFormat", + "creator", + "upload_date" + ] + }, + "setup_questions": { + "type": "array", + "items": { + "type": "object", + "properties": { + "question_num": { + "type": "number" + }, + "text": { + "type": "string" + }, + "attribute": { + "type": "string" + }, + "answer": { + "type": "string" + }, + "context": { + "type": "string" + }, + "longAnswer": { + "type": "string" + }, + "privacy_params": { + "type": "object", + "properties": { + "epsilon": { + "type": "number" + }, + "delta": { + "type": "number" + } + }, + "required": [ + "epsilon", + "delta" + ] + } + }, + "required": [ + "question_num", + "text", + "attribute", + "answer", + "context" + ] + } + }, + "statistics": { + "type": "array", + "items": { + "type": "object", + "properties": { + "statistic": { + "type": "string" + }, + "variable": { + "type": "string" + }, + "variable_type": { + "type": "string" + }, + "result": { + "type": "object", + "properties": { + "value": {} + }, + "required": [ + "value" + ] + }, + "noise_mechanism": { + "type": "string" + }, + "epsilon": { + "type": "number" + }, + "delta": {}, + "boolean_values": { + "type": "object", + "properties": { + "true_value": { + "type": "number" + }, + "false_value": { + "type": "number" + } + }, + "required": [ + "true_value", + "false_value" + ] + }, + "missing_value_handling": { + "type": "object", + "properties": { + "type": { + "type": "string" + }, + "fixed_value": {} + }, + "required": [ + "type", + "fixed_value" + ] + }, + "confidence_level": { + "type": "number" + }, + "confidence_level_alpha": { + "type": "number" + }, + "accuracy": { + "type": "object", + "properties": { + "value": { + "type": "number" + }, + "message": { + "type": "string" + } + }, + "required": [ + "value", + "message" + ] + }, + "description": { + "type": "object", + "properties": { + "html": { + "type": "string" + }, + "text": { + "type": "string" + } + }, + "required": [ + "html", + "text" + ] + }, + "bounds": { + "type": "object", + "properties": { + "min": { + "type": "number" + }, + "max": { + "type": "number" + } + }, + "required": [ + "min", + "max" + ] + } + }, + "required": [ + "statistic", + "variable", + "variable_type", + "result", + "noise_mechanism", + "epsilon", + "delta", + "missing_value_handling", + "confidence_level", + "confidence_level_alpha", + "accuracy", + "description" + ] + } + } + }, + "required": [ + "name", + "created", + "application", + "application_url", + "differentially_private_library", + "dataset", + "setup_questions", + "statistics" + ] +} \ No newline at end of file diff --git a/server/opendp_apps/release_schemas/serializers.py b/server/opendp_apps/release_schemas/serializers.py new file mode 100644 index 00000000..a06a3339 --- /dev/null +++ b/server/opendp_apps/release_schemas/serializers.py @@ -0,0 +1,13 @@ +from rest_framework import serializers + +from opendp_apps.release_schemas.models import ReleaseInfoSchema + + +class ReleaseSchemaSerializer(serializers.ModelSerializer): + class Meta: + model = ReleaseInfoSchema + fields = ['schema'] + read_only_fields = ['schema'] + + def to_representation(self, value): + return value.schema diff --git a/server/opendp_apps/release_schemas/static_vals.py b/server/opendp_apps/release_schemas/static_vals.py new file mode 100644 index 00000000..85767ae8 --- /dev/null +++ b/server/opendp_apps/release_schemas/static_vals.py @@ -0,0 +1,3 @@ + + +SCHEMA_FIELD_NOT_SET = '(not set)' \ No newline at end of file diff --git a/server/opendp_apps/release_schemas/testing/__init__.py b/server/opendp_apps/release_schemas/testing/__init__.py new file mode 100644 index 00000000..e77c6bed --- /dev/null +++ b/server/opendp_apps/release_schemas/testing/__init__.py @@ -0,0 +1,4 @@ +""" +python manage.py test opendp_apps.release_schemas.testing.test_release_schemas + +""" \ No newline at end of file diff --git a/server/opendp_apps/release_schemas/testing/schema_examples/01_invalid_schema.json b/server/opendp_apps/release_schemas/testing/schema_examples/01_invalid_schema.json new file mode 100644 index 00000000..8bfe9a80 --- /dev/null +++ b/server/opendp_apps/release_schemas/testing/schema_examples/01_invalid_schema.json @@ -0,0 +1,27 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "my json api", + "description": "my json api", + "type": "object", + "properties": { + "my_api_response": { + "type": "object", + "properties": { + "MailboxInfo": { + "type": "array", + "items": { + "type": "object", + "properties": { + "ADSyncLinkEnabled": { + "type": "any" + } + } + } + } + } + } + }, + "required": [ + "response" + ] +} diff --git a/server/opendp_apps/release_schemas/testing/schema_examples/release_v0.2.0_test_01.json b/server/opendp_apps/release_schemas/testing/schema_examples/release_v0.2.0_test_01.json new file mode 100644 index 00000000..fae2c9e2 --- /dev/null +++ b/server/opendp_apps/release_schemas/testing/schema_examples/release_v0.2.0_test_01.json @@ -0,0 +1,106 @@ +{ + "name": "Plan 1", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$ref": "http://localhost:8000/api/schema/0.2.0/", + "version": "0.2.0", + "created": { + "iso": "2023-08-31T17:48:08.111656", + "human_readable": "August 31, 2023 at 17:48:08:111656 ", + "human_readable_date_only": "31 August, 2023" + }, + "application": "DP Creator", + "application_url": "https://github.com/opendp/dpcreator", + "differentially_private_library": { + "name": "OpenDP", + "url": "https://github.com/opendp/opendp", + "version": "0.6.1" + }, + "dataset": { + "type": "upload", + "name": "teacher_survey.csv", + "fileFormat": "text/csv", + "creator": { + "first_name": "Jameela", + "last_name": "Baker", + "email": "dp_analyst@some.edu" + }, + "upload_date": { + "iso": "2023-07-17T15:45:21.113000+00:00", + "human_readable": "July 17, 2023 at 15:45:21:113000 UTC", + "human_readable_date_only": "17 July, 2023" + } + }, + "setup_questions": [ + { + "question_num": 1, + "text": "Does your data file depend on private information of subjects?", + "attribute": "radio_depend_on_private_information", + "answer": "yes", + "context": "Question to help determine whether differential privacy is appropriate for this data file." + }, + { + "question_num": 2, + "text": "Which of the following best describes your data file?", + "attribute": "radio_best_describes", + "answer": "notHarmButConfidential", + "context": "The answer is used to set privacy parameters (default epsilon and delta values) which may be changed later in the process.", + "longAnswer": "Information that, if disclosed, would not cause material harm, but which the organization has chosen to keep confidential", + "privacy_params": { + "epsilon": 1, + "delta": 1e-05 + } + }, + { + "question_num": 3, + "text": "Does each individual appear in only one row?", + "attribute": "radio_only_one_individual_per_row", + "answer": "yes", + "context": "Used to help determine dataset distance." + }, + { + "question_num": 4, + "text": "Is your data a secret and simple random sample from a larger population?", + "attribute": "secret_sample", + "answer": "no", + "context": "If the data is a simple random sample, we can use methods (amplification) to increase the accuracy and utility of the statistics you create." + }, + { + "question_num": 5, + "text": "Can the number of observations in your data file be made public knowledge?", + "attribute": "observations_number_can_be_public", + "answer": "yes", + "context": "If the data file size can be made public, we don't need to spend a portion of your privacy budget to estimate it." + } + ], + "statistics": [ + { + "statistic": "mean", + "variable": "age", + "variable_type": "Integer", + "result": { + "value": 37.465849349134174 + }, + "noise_mechanism": "Laplace", + "epsilon": 0.2, + "delta": null, + "bounds": { + "min": 20.0, + "max": 75.0 + }, + "missing_value_handling": { + "type": "insert_fixed", + "fixed_value": 45.0 + }, + "confidence_level": 0.95, + "confidence_level_alpha": 0.05, + "accuracy": { + "value": 0.11768948218192063, + "message": "There is a probability of 95.0% that the DP Mean will differ from the true Mean by at most 0.11768948218192063 units. Here the units are the same units the variable age has in the dataset." + }, + "description": { + "html": "A differentially private Mean for variable age was calculated with the result 37.465849349134174. There is a probability of 95.0% that the DP Mean will differ from\nthe true Mean by at most 0.11768948218192063 units. Here the units are the same units\nthe variable age has in the dataset.", + "text": "A differentially private Mean for variable \"age\" was calculated with the result 37.465849349134174. There is a probability of 95.0% that the DP Mean will differ from the true Mean by at most 0.11768948218192063 units. Here the units are the same units the variable age has in the dataset." + } + } + ] +} \ No newline at end of file diff --git a/server/opendp_apps/release_schemas/testing/test_release_schemas.py b/server/opendp_apps/release_schemas/testing/test_release_schemas.py new file mode 100644 index 00000000..56333239 --- /dev/null +++ b/server/opendp_apps/release_schemas/testing/test_release_schemas.py @@ -0,0 +1,72 @@ +from os.path import abspath, dirname, join + +from django.contrib.auth import get_user_model +from django.test import TestCase +from rest_framework import status + +from opendp_apps.model_helpers.msg_util import msgt + +CURRENT_DIR = dirname(abspath(__file__)) +TEST_DATA_DIR = join(dirname(dirname(dirname(CURRENT_DIR))), 'test_data') + + +class ReleaseSchemasTest(TestCase): + fixtures = ['schema_fixture_v0-2-0.json'] + + VERSION_0_2_0 = '0.2.0' + JSON_SCHEMA_URL = 'https://json-schema.org/draft/2020-12/schema' + + def setUp(self): + # super().setUp() + + self.API_SCHEMA_PREFIX = '/api/schema/' + + # Create a OpenDP User object + # + self.user_obj, _created = get_user_model().objects.get_or_create(username='dp_depositor') + + def check_basic_schema_fields(self, json_info, semantic_version): + """Check basic fields""" + self.assertTrue(type(json_info) is dict) + self.assertTrue(type(semantic_version) is str) + + self.assertEqual(json_info['version'], semantic_version) + self.assertEqual(json_info['$schema'], self.JSON_SCHEMA_URL) + self.assertTrue(json_info['$id'].endswith(semantic_version + '/')) + + def test_010_retrieve_plan_list(self): + """(10) Retrieve schema via API, using list""" + msgt(self.test_010_retrieve_plan_list.__doc__) + + response = self.client.get(self.API_SCHEMA_PREFIX, + content_type='application/json') + + self.assertEqual(response.status_code, status.HTTP_200_OK) + + json_info = response.json() + self.assertEqual(json_info['count'], 1) + self.check_basic_schema_fields(json_info['results'][0], self.VERSION_0_2_0) + + def test_020_retrieve_plan_by_version(self): + """(20) Retrieve schema via API, using version number""" + msgt(self.test_020_retrieve_plan_by_version.__doc__) + + schema_url = f'{self.API_SCHEMA_PREFIX}{self.VERSION_0_2_0}/' + response = self.client.get(schema_url, + content_type='application/json') + + self.assertEqual(response.status_code, status.HTTP_200_OK) + + json_info = response.json() + self.check_basic_schema_fields(response.json(), self.VERSION_0_2_0) + + def test_030_retrieve_plan_by_latest(self): + """(30) Retrieve schema via API, using 'latest/' """ + msgt(self.test_030_retrieve_plan_by_latest.__doc__) + + schema_url = f'{self.API_SCHEMA_PREFIX}latest/' + response = self.client.get(schema_url, + content_type='application/json') + + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.check_basic_schema_fields(response.json(), self.VERSION_0_2_0) diff --git a/server/opendp_apps/release_schemas/tests.py b/server/opendp_apps/release_schemas/tests.py new file mode 100644 index 00000000..7ce503c2 --- /dev/null +++ b/server/opendp_apps/release_schemas/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/server/opendp_apps/release_schemas/validators.py b/server/opendp_apps/release_schemas/validators.py new file mode 100644 index 00000000..50a6d409 --- /dev/null +++ b/server/opendp_apps/release_schemas/validators.py @@ -0,0 +1,52 @@ +import jsonschema +from django.core.exceptions import ValidationError +from django.utils.translation import gettext_lazy as _ +from packaging.version import Version, InvalidVersion + + +def validate_json_schema(value): + """Validate the JSON schema using the Python library jsonschema""" + try: + jsonschema.Validator.check_schema(value) + except jsonschema.exceptions.SchemaError as err_obj: + raise ValidationError(f'Error in schema: {err_obj.message}') + + required_keys = ['$schema', '$id', 'version', 'title'] + + for req_key in required_keys: + if req_key not in value: + raise ValidationError(f'JSON schema is missing required key: "{req_key}"') + + +def validate_semantic_version_number(value): + """Validate the semanatic version number""" + try: + Version(value) + except InvalidVersion: + raise ValidationError( + _("%(value)s is not a valid version"), + params={"value": value}, + ) + + +def get_sortable_semantic_version(version_number): + """Return a sortable version of the semantic version""" + validate_semantic_version_number(version_number) + # + v = Version(version_number) + # + vparts = [v.major, v.minor, v.micro] + vnum = 'v' + for vpart in vparts: + vnum += '-' + str(vpart).zfill(4) + # + return vnum + + +def format_semantic_version(version_number): + """Return a sortable version of the semantic version""" + validate_semantic_version_number(version_number) + # + v = Version(version_number) + # + return f'{v.major}.{v.minor}.{v.micro}' diff --git a/server/opendp_apps/release_schemas/views.py b/server/opendp_apps/release_schemas/views.py new file mode 100644 index 00000000..43089986 --- /dev/null +++ b/server/opendp_apps/release_schemas/views.py @@ -0,0 +1,48 @@ +import logging + +from django.conf import settings +from rest_framework import viewsets +from rest_framework.decorators import action +from rest_framework.response import Response +from rest_framework.generics import get_object_or_404 +from opendp_apps.utils.view_helper import get_json_error +from rest_framework import status + +from opendp_apps.release_schemas.models import ReleaseInfoSchema +from opendp_apps.release_schemas.serializers import ReleaseSchemaSerializer + +logger = logging.getLogger(settings.DEFAULT_LOGGER) + + +class ReleaseSchemaView(viewsets.ModelViewSet): + """ + API endpoint to list AnalysisPlans, but w/o information such as variable_info and dp_statistics. + This listing is used to populate tables that include AnalysisPlans with published ReleaseInfo object where the logged in user is not the analyst or dataset creator. + """ + serializer_class = ReleaseSchemaSerializer + lookup_field = 'version' + lookup_value_regex = '[\w.]+' + http_method_names = ['get'] + + def get_queryset(self): + """ + AnalysisPlans for the currently authenticated user. + """ + return ReleaseInfoSchema.objects.filter(is_published=True) + + + @action(detail=False, methods=['GET'], url_path='latest') + def latest(self, request): + """ + Retrieve the JSON schema for a given version + Example: http://127.0.0.1:8000/api/release-download/0-2-0/json/ + """ + release_schema = ReleaseInfoSchema.objects.filter(is_published=True).first() + if release_schema is not None: + serializer = ReleaseSchemaSerializer(release_schema) # serialize the data + logger.info(f"AnalysisPlan created: {serializer.data}") + return Response(serializer.data, status=status.HTTP_200_OK) + + return Response(get_json_error('A published schema was not found'), + status=status.HTTP_404_NOT_FOUND) + diff --git a/server/opendp_project/settings/base.py b/server/opendp_project/settings/base.py index 3a759efe..91c71d6d 100644 --- a/server/opendp_project/settings/base.py +++ b/server/opendp_project/settings/base.py @@ -67,6 +67,7 @@ 'opendp_apps.terms_of_access', 'opendp_apps.banner_messages', 'opendp_apps.profiler', + 'opendp_apps.release_schemas', 'opendp_apps.dp_reports', ] diff --git a/server/opendp_project/urls.py b/server/opendp_project/urls.py index 6b4e20a4..2630d2f0 100644 --- a/server/opendp_project/urls.py +++ b/server/opendp_project/urls.py @@ -34,6 +34,7 @@ from opendp_apps.dataverses.urls import router as dataverse_router from opendp_apps.dataverses.views.dataverse_file_view import DataverseFileView from opendp_apps.dataverses.views.registered_dataverse_view import RegisteredDataverseView +from opendp_apps.release_schemas.views import ReleaseSchemaView from opendp_apps.terms_of_access.views import TermsOfAccessViewSet, TermsOfAccessAgreementViewSet from opendp_apps.user.views import GoogleLogin from opendp_apps.user.views import UserViewSet @@ -59,6 +60,7 @@ router.register(r'profile', ProfilingViewSet, basename='profile') router.register(r'registered-dvs', RegisteredDataverseView, basename='registered-dvs') +router.register(r'schema', ReleaseSchemaView, basename='schema') router.register(r'release', ReleaseView, basename='release') router.register(r'release-download', ReleaseFileDownloadView, basename='release-download') diff --git a/server/requirements/base.txt b/server/requirements/base.txt index 4259696c..8387f3e9 100644 --- a/server/requirements/base.txt +++ b/server/requirements/base.txt @@ -16,7 +16,8 @@ djangorestframework>=3.12.2 django-rest-polymorphic>=0.1.9 django-cors-headers==3.6.0 idna==2.10 -jsonschema==4.2.1 +jsonschema==4.19.0 +jsonschema-specifications==2023.7.1 lxml==4.9.1 numpy==1.22.0 opencensus-ext-azure==1.1.3 diff --git a/server/scripts_dev/test_schema.py b/server/scripts_dev/test_schema.py new file mode 100644 index 00000000..b2fdcd88 --- /dev/null +++ b/server/scripts_dev/test_schema.py @@ -0,0 +1,43 @@ +# Basic settings +import os + +os.environ['DJANGO_SETTINGS_MODULE'] = 'opendp_project.settings.development_test' +from load_django_settings import * + +load_local_settings() + +import json +from os.path import abspath, dirname, join +from opendp_apps.release_schemas.schema_validator import SchemaValidator +from opendp_apps.model_helpers.msg_util import msgt + +PROJECT_DIR = dirname(dirname(abspath(__file__))) +SCHEMA_DIR = join(dirname(dirname(abspath(__file__))), + 'opendp_apps', 'release_schemas', 'schemas') +SCHEMA_EXAMPLES_DIR = join(dirname(dirname(abspath(__file__))), + 'opendp_apps', 'release_schemas', 'testing', 'schema_examples') + + +def test_schema(): + """Test the schema""" + schema_file = join(SCHEMA_DIR, 'schema_v0.2.0.json') + #schema_file = join(SCHEMA_DIR, 'snippet_dpcreator_schema_v01.beta.json') + schema_example_file = join(SCHEMA_EXAMPLES_DIR, 'release_v0.2.0_test_01.json') + + with open(schema_file, 'r') as in_file: + schema_dict = json.load(in_file) + + with open(schema_example_file, 'r') as in_file: + release_info = json.load(in_file) + + validator = SchemaValidator(schema_dict, release_info) + #validator = SchemaValidator({}, release_info) + if validator.has_error(): + msgt(f'validator.errors: {validator.get_err_msg()}') + # print('validator.warnings: ', validator.warnings) + else: + print('No errors!') + + +if __name__ == '__main__': + test_schema()