Skip to content

Commit

Permalink
Ignoring invalid covenants in exports
Browse files Browse the repository at this point in the history
  • Loading branch information
mikejcorey committed Oct 30, 2024
1 parent 985d404 commit a818aa7
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 18 deletions.
50 changes: 35 additions & 15 deletions apps/deed/management/commands/gather_image_hits.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,20 +80,37 @@ def build_match_report(self, workflow, matching_keys):

report_df['num_terms'] = report_df['matched_terms'].apply(lambda x: len(x.split(',')))

# create special flag for exceptions like "occupied by any" and "death certificate"
if 'occupied by any' in report_df.columns:
print(report_df['occupied by any'].apply(lambda x: self.split_or_1(x)))
# create special flag for exceptions when they occur as the only term hit like "occupied by any" and "death certificate"
bad_solo_terms = ['any person of', 'any person other', 'citizen', 'decent', 'descent', 'occupied by any', 'person not of', 'persons not of', 'persons other than', 'used or occupied']

report_df.loc[~report_df['occupied by any'].isna(), 'occupied_count'] = report_df['occupied by any'].apply(lambda x: self.split_or_1(x))
else:
report_df['occupied_count'] = 0
for term in bad_solo_terms:
if term in report_df.columns:
report_df.loc[~report_df[term].isna(), 'bad_solo_count'] = report_df[term].apply(lambda x: self.split_or_1(x))
else:
report_df['bad_solo_count'] = 0

if 'citizen' in report_df.columns:
print(report_df['citizen'].apply(lambda x: self.split_or_1(x)))
# non-racial terms, for example as requested by CC County. If this is only term found, set as exception so it can be exported separately
nonracial_terms = ['disorderly persons', 'less than 18 years', 'no children', 'no minor', 'occupy said real property', 'poverty', 'under the age of', 'years of age or older']

report_df.loc[~report_df['citizen'].isna(), 'citizen_count'] = report_df['citizen'].apply(lambda x: self.split_or_1(x))
else:
report_df['citizen_count'] = 0
for term in workflow_special_terms:
if term in report_df.columns:
report_df.loc[~report_df[term].isna(), 'nonracial_term_count'] = report_df[term].apply(lambda x: self.split_or_1(x))
else:
report_df['nonracial_term_count'] = 0

# if 'occupied by any' in report_df.columns:
# print(report_df['occupied by any'].apply(lambda x: self.split_or_1(x)))

# report_df.loc[~report_df['occupied by any'].isna(), 'occupied_count'] = report_df['occupied by any'].apply(lambda x: self.split_or_1(x))
# else:
# report_df['occupied_count'] = 0

# if 'citizen' in report_df.columns:
# print(report_df['citizen'].apply(lambda x: self.split_or_1(x)))

# report_df.loc[~report_df['citizen'].isna(), 'citizen_count'] = report_df['citizen'].apply(lambda x: self.split_or_1(x))
# else:
# report_df['citizen_count'] = 0

report_df['deathcert_count'] = 0
death_certs = ['death certificate', 'certificate of death', 'date of death', 'name of deceased']
Expand All @@ -115,11 +132,14 @@ def build_match_report(self, workflow, matching_keys):
# Set bool_match to True, unless there's a suspect value or combination
report_df['bool_match'] = True
report_df['bool_exception'] = False
report_df.loc[(report_df['num_terms'] == 1) & (report_df['occupied_count'] > 0), 'bool_match'] = False
report_df.loc[(report_df['num_terms'] == 1) & (report_df['occupied_count'] > 0), 'bool_exception'] = True
report_df.loc[(report_df['num_terms'] == 1) & (report_df['bad_solo_count'] > 0), 'bool_match'] = False
report_df.loc[(report_df['num_terms'] == 1) & (report_df['bad_solo_count'] > 0), 'bool_exception'] = True

report_df.loc[(report_df['num_terms'] == 1) & (report_df['nonracial_term_count'] > 0), 'bool_match'] = False
report_df.loc[(report_df['num_terms'] == 1) & (report_df['nonracial_term_count'] > 0), 'bool_exception'] = True

report_df.loc[(report_df['num_terms'] == 1) & (report_df['citizen_count'] > 0), 'bool_match'] = False
report_df.loc[(report_df['num_terms'] == 1) & (report_df['citizen_count'] > 0), 'bool_exception'] = True
# report_df.loc[(report_df['num_terms'] == 1) & (report_df['citizen_count'] > 0), 'bool_match'] = False
# report_df.loc[(report_df['num_terms'] == 1) & (report_df['citizen_count'] > 0), 'bool_exception'] = True

# Death cert is an exception no matter how many other terms found
report_df.loc[report_df['deathcert_count'] > 0, 'bool_match'] = False
Expand Down
2 changes: 1 addition & 1 deletion apps/deed/management/commands/run_term_search_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def extract_match_context(self, match_details, ocr_json):
for line in term['line_nums']:
print(ocr_lines[line]['Text'])
line_texts.append(ocr_lines[line]['Text'])
out_context.append({'term': term, 'line_nums': term['line_nums'], 'lines': line_texts})
out_context.append({'term': term['term'], 'line_nums': term['line_nums'], 'lines': line_texts})
return out_context

def trigger_lambda(self, deedpage_obj):
Expand Down
4 changes: 2 additions & 2 deletions apps/parcel/management/commands/match_parcels.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ def tag_matched_parcels(self, workflow):
Parcel.objects.filter(workflow=workflow, bool_covenant=True).update(bool_covenant=False)

print("Tagging bool_covenant=True for matched Parcels on ZooniverseSubjects...")
Parcel.objects.filter(workflow=workflow, zooniversesubject__isnull=False).update(bool_covenant=True)
Parcel.objects.filter(workflow=workflow, zooniversesubject__isnull=False, zooniversesubject__bool_covenant_final=True).update(bool_covenant=True)

print("Tagging bool_covenant=True for matched Parcels on ManualCovenants...")
Parcel.objects.filter(workflow=workflow, manualcovenant__isnull=False).update(bool_covenant=True)
Parcel.objects.filter(workflow=workflow, manualcovenant__isnull=False, manualcovenant__bool_confirmed=True).update(bool_covenant=True)

def write_match_report(self, workflow, bool_local=False, bool_test=False):
fieldnames = ['join_string', 'match', 'subject_id',
Expand Down

0 comments on commit a818aa7

Please sign in to comment.