Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Several updates related to migrations/tests #817

Merged
merged 4 commits into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -299,13 +299,15 @@
"min":10.0,
"name":"EyeHeight",
"type":"Numerical",
"selected": true,
"label":""
},
"TypingSpeed":{
"max":0.0,
"min":100.0,
"name":"TypingSpeed",
"type":"Numerical",
"selected": true,
"label":""
},
"BlinkDuration":{
Expand Down
4 changes: 2 additions & 2 deletions server/opendp_apps/analysis/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ class ReleaseInfo(TimestampedModelWithUUID):
dp_release = models.JSONField()

dp_release_json_file = models.FileField( \
storage=RELEASE_FILE_STORAGE,
storage=settings.RELEASE_FILE_STORAGE,
upload_to='release-files/%Y/%m/%d/',
blank=True, null=True)

dp_release_pdf_file = models.FileField(
storage=RELEASE_FILE_STORAGE,
storage=settings.RELEASE_FILE_STORAGE,
upload_to='release-files/%Y/%m/%d/',
blank=True, null=True)

Expand Down
11 changes: 10 additions & 1 deletion server/opendp_apps/analysis/static_vals.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,13 @@ def missing_val_label(missing_val_type):
ERR_MSG_NO_ANALYSIS_PLAN = 'AnalysisPlan object not found for this object_id and creator'
ERR_MSG_FIELDS_NOT_UPDATEABLE = 'These fields are not updatable: {problem_field_str}'

ERR_MSG_ANALYSIS_PLAN_EPSILON_TOO_HIGH = ('The AnalysisPlan epsilon ({analysis_epsilon}) is greater than'
' the DepositorSetupInfo epsilon ({dataset_epsilon}).')

ERR_MSG_BAD_TOTAL_EPSILON_DEPOSITOR_INFO = 'The depositor setup info has an invalid epsilon value ({epsilon})'
ERR_MSG_BAD_EPSILON_ANALYSIS_PLAN = 'The analysis plan has an invalid epsilon value ({epsilon}).'


ERR_MSG_BAD_TOTAL_EPSILON = 'The depositor setup info has an invalid epsilon value'
ERR_MSG_BAD_TOTAL_DELTA = 'The depositor setup info has an invalid delta value'

Expand Down Expand Up @@ -247,4 +254,6 @@ def missing_val_label(missing_val_type):

ERR_MSG_RELEASES_EXISTS = 'A Release has been created from this AnalysisPlan. Changes are not allowed.'
ERR_MSG_ANALYSIS_PLAN_EXPIRED = 'This AnalysisPlan has expired.'
ERR_MSG_NO_FIELDS_TO_UPDATE = "There are no fields to update."
ERR_MSG_NO_FIELDS_TO_UPDATE = "There are no fields to update."

ERR_MSG_VARIABLE_NOT_FOUND_IN_ANALYSIS_PLAN = 'Variable "{var_name}" was not found in the AnalysisPlan'
2 changes: 1 addition & 1 deletion server/opendp_apps/analysis/testing/base_stat_spec_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def test_for_available_epsilon(self):
plan = self.retrieve_new_plan()

self.assertTrue(plan is not None)
self.assertEqual(2.0, AnalysisPlanCreator.get_available_epsilon(self.eye_typing_dataset))
# self.assertEqual(2.0, AnalysisPlanCreator.get_available_epsilon(self.eye_typing_dataset))

return
"""
Expand Down
284 changes: 101 additions & 183 deletions server/opendp_apps/analysis/testing/test_run_release.py

Large diffs are not rendered by default.

209 changes: 1 addition & 208 deletions server/opendp_apps/analysis/tools/histogram_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_histogram_stat_spec(props: dict) -> StatSpec:
@param props:
@return: StatSpec of the correct Histogram type
"""
print('>> (30) get_histogram_stat_spec / props: ', dict(props))
# print('>> (30) get_histogram_stat_spec / props: ', dict(props))
var_type = props.get('var_type')
histogram_bin_type = props.get(astatic.KEY_HIST_BIN_TYPE, astatic.HIST_BIN_TYPE_ONE_PER_VALUE)

Expand Down Expand Up @@ -115,210 +115,3 @@ def has_integer_categories(props: dict):
return True, (min(cats), max(cats))

return False, None


'''
class HistogramUtil(BasicErrCheck):


def __init__(self, stat_spec: StatSpec):
"""

@param stat_spec: Should be for a histogram, e.g. DPHistogramIntOnePerValueSpec, etc
"""
self.stat_spec = stat_spec


self.run_process()

def use_bin_edges(self):
"""Check if edges should be retrieved--if not use categories"""
assert self.has_error() is False, \
"Check that .has_error() is False before calling this method"

return self.expect_edges

def get_bin_edges(self) -> list:
"""
Return calculated edges.
Check that .has_error() is True before calling this method
@return:
"""
assert self.has_error() is False, \
"Check that .has_error() is False before calling this method"

return self.bin_edges

def get_categories(self) -> list:
"""
Return calculated categories.
Check that .has_categories() is True before calling this method
@return:
"""
assert self.has_error() is False, \
"Check that .has_error() is False before calling this method"

return self.categories

def run_process(self):
"""
@return:
"""
if self.has_error():
return

if not self.validate_histogram_props():
return

# self.create_categories()

def xcreate_categories(self) -> bool:
"""
If applicable, create categories
@return:
"""
if self.has_error():
return False

if self.stat_spec.histogram_bin_type == astatic.HIST_BIN_TYPE_ONE_PER_VALUE:
return True

if self.stat_spec.histogram_bin_type == astatic.HIST_BIN_TYPE_EQUAL_RANGES:
pass

def validate_histogram_props(self) -> bool:
"""
Make sure that the variable type is appropriate for the histogram settings.
(e) = edges
(c) = categories

Data Type onePerValue equalRanges binEdges
Categorical X (c)
Integer X (c) X (e) X (e)
Float X (e) X (e)*

@return:
"""
if self.has_error():
return False

# Categorical
if self.stat_spec.var_type == pstatic.VAR_TYPE_CATEGORICAL:
# Can only have bin type OnePerValue
self.expect_edges = False
self.categories = self.stat_spec.categories
if not self.validate_property(self.stat_spec.histogram_bin_type,
validate_histogram_bin_type_one_per_value,
'histogram_bin_type'):
return False
return True

# Integer
if self.stat_spec.var_type == pstatic.VAR_TYPE_INTEGER:
# Any bin type is okay
if not self.validate_property(self.stat_spec.histogram_bin_type,
validate_histogram_bin_type,
'histogram_bin_type'):
return False

return self.construct_integer_categories()

# Float
if self.stat_spec.var_type == pstatic.VAR_TYPE_FLOAT:
if not self.validate_property(self.stat_spec.histogram_bin_type,
validate_histogram_bin_type_equal_ranges_or_edges,
'histogram_bin_type'):
return False
return True

self.stat_spec.add_err_msg('Unknown variable type: "{self.stat_spec.var_type"}')
return False

def validate_property(self, value, validator, prop_name: str = None) -> bool:
"""Validate a property name using a validator"""
if self.has_error():
return False

try:
validator(value)
except ValidationError as err_obj:
user_msg = f'{err_obj.message}'
if prop_name:
user_msg = f'{user_msg} ({prop_name})'
self.add_err_msg(user_msg)
return False

return True

def construct_integer_categories(self) -> bool:
"""Construct categories if the values are integers"""
assert self.stat_spec.var_type == pstatic.VAR_TYPE_INTEGER, \
(f'This function should only be called if the "var_type" is '
f' "{pstatic.VAR_TYPE_INTEGER}"')

# OnePerValue
if self.stat_spec.histogram_bin_type == astatic.HIST_BIN_TYPE_ONE_PER_VALUE:
# Create categories
#
self.expect_edges = False
self.categories = [x for x in range(self.stat_spec.min, self.stat_spec.max + 1)]
return True

elif self.stat_spec.histogram_bin_type == astatic.HIST_BIN_TYPE_EQUAL_RANGES:
# EqualRanges
return self.set_integer_edges_from_number_of_bins()

elif self.stat_spec.histogram_bin_type == astatic.HIST_BIN_TYPE_BIN_EDGES:
# BinEdges
return self.set_integer_edges_from_number_of_bins()

self.add_err_msg(astatic.ERR_MSG_HIST_BIN_TYPE_UKNOWN)
return False

def set_integer_edges_from_number_of_bins(self) -> bool:
"""
For var type, integer, set the edges based on the hist_number_of_bins
@return: bool
"""
if not self.validate_property(self.stat_spec.histogram_number_of_bins,
validate_int_greater_than_zero,
'hist_number_of_bins'):
return False

# Make sure there aren't more bins than values
#
num_items = self.stat_spec.max - self.stat_spec.min
if self.stat_spec.histogram_number_of_bins > num_items:
user_msg = ('There are more bins than values.'
' (hist_number_of_bins)')
self.add_err_msg(user_msg)
return False

# set bin_edges
min_max = (self.stat_spec.min, self.stat_spec.max)
bin_edges = np.histogram_bin_edges(
[],
bins=self.stat_spec.histogram_number_of_bins,
range=min_max)

# Note: adding 1 to the max, which includes the max in the last bin
self.bin_edges = [int(x) for x in bin_edges[:-1]] + [self.stat_spec.max + 1]
if len(self.bin_edges) != len(list(set(self.bin_edges))):
self.bin_edges = None
self.add_err_msg(astatic.ERR_MSG_TOO_MANY_BINS)
return False

return True


"""
min = 1
max = 101
num_bins = 5
values = range(1, 101)
bin_size = len(values/num_bins)


# edges =
"""
'''
14 changes: 6 additions & 8 deletions server/opendp_apps/analysis/tools/stat_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@
(validate_confidence_level,
validate_statistic,
validate_epsilon_not_null,
validate_not_empty_or_none,
validate_int_not_negative,
validate_fixed_value_in_categories,
# validate_bool_true_false,
# validate_bool_true_false,
)


Expand Down Expand Up @@ -59,7 +58,6 @@ def __init__(self, props: dict):
self.epsilon = float(props.get('epsilon')) if props.get('epsilon') else None
self.cl = props.get('cl') # confidence level coefficient (e.g. .95, .99, etc)


# (1a) histogram specific
self.histogram_bin_type = props.get('histogram_bin_type')
self.histogram_number_of_bins = props.get('histogram_number_of_bins')
Expand Down Expand Up @@ -486,12 +484,12 @@ def get_single_err_msg(self):
def print_debug(self):
"""show params"""
print('-' * 40)
props_to_print = self.__dict__.copy()
del props_to_print['prop_validators']
try:
print(json.dumps(self.__dict__, indent=4, cls=DjangoJSONEncoder))
print(json.dumps(props_to_print, indent=4, cls=DjangoJSONEncoder))
except TypeError as err_obj:
print(f'stat_spec.print_debug() failed with {err_obj}')
# for key, val in self.__dict__.items():
# print(f'{key}: {val}')
print(f'stat_spec.print_debug() failed with {err_obj}', self.__dict__)

def get_short_description_text(self, template_name=None):
"""Get description in plain text"""
Expand Down Expand Up @@ -567,7 +565,7 @@ def get_release_dict(self) -> OrderedDict:
#
if 'true_value' in self.get_prop_validator_keys():
final_info['boolean_values'] = OrderedDict({'true_value': self.true_value,
'false_value': self.false_value})
'false_value': self.false_value})

# Missing values
#
Expand Down
Loading
Loading