Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
akariv committed Jul 13, 2023
1 parent e2ff201 commit b63860a
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 162 deletions.
319 changes: 163 additions & 156 deletions operators/entities/guidestar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def func(rows: ResourceWrapper):
existing_orgs.add(regNum)

branches = ga.branches(regNum)
if len(branches) == 0:
continue
# if len(branches) == 0:
# continue
services = ga.services(regNum)
govServices = dict(
(s['relatedMalkarService'], s) for s in services if s.get('serviceGovName') is not None and s.get('relatedMalkarService') is not None
Expand Down Expand Up @@ -85,163 +85,170 @@ def update_from_taxonomy(names, responses, situations):
}, settings.AIRTABLE_API_KEY),
).process()

def func(row):
if 'data' not in row:
# print('NO DATA', row)
return
data = row['data']

responses = set()
situations = set()

row['name'] = data.pop('serviceName')
row['description'] = data.pop('voluntaryDescription') or data.pop('description')
data_source_url = f'https://www.guidestar.org.il/organization/{data["organization_id"]}/services'
row['data_sources'] = f'מידע נוסף אפשר למצוא ב<a target="_blank" href="{data_source_url}">גיידסטאר - אתר העמותות של ישראל</a>'
orgId = data.pop('organization_id')
actual_branch_ids = data.pop('actual_branch_ids')
row['branches'] = ['guidestar:' + b['branchId'] for b in (data.pop('branches') or []) if b['branchId'] in actual_branch_ids]
if len(row['branches']) == 0:
row['branches'] = ['guidestar:' + bid for bid in actual_branch_ids]

record_type = data.pop('recordType')
assert record_type == 'GreenInfo'
for k in list(data.keys()):
if k.startswith('youth'):
data.pop(k)

relatedMalkarService = data.pop('relatedMalkarService') or {}

update_from_taxonomy([data.pop('serviceTypeName')], responses, situations)
update_from_taxonomy((data.pop('serviceTargetAudience') or '').split(';'), responses, situations)
update_from_taxonomy(['soproc:' + relatedMalkarService.get('serviceGovId', '')], responses, situations)

payment_required = data.pop('paymentMethod')
if payment_required == 'Free service':
row['payment_required'] = 'no'
row['payment_details'] = None
elif payment_required == 'Symbolic cost':
row['payment_required'] = 'yes'
row['payment_details'] = 'עלות סמלית'
elif payment_required == 'Full payment':
row['payment_required'] = 'yes'
row['payment_details'] = 'השירות ניתן בתשלום'
elif payment_required == 'Government funded':
row['payment_required'] = 'yes'
row['payment_details'] = 'השירות מסובסד על ידי הממשלה'
else:
assert False, payment_required + ' ' + repr(row)

service_terms = data.pop('serviceTerms')
if service_terms:
if row.get('payment_details'):
row['payment_details'] += ', ' + service_terms
else:
row['payment_details'] = service_terms

details = []
areas = []
national = False

area = (data.pop('area') or '').split(';')
for item in area:
if item == 'In Branches':
areas.append('בסניפי הארגון')
elif item == 'Country wide':
areas.append('בתיאום מראש ברחבי הארץ')
national = True
elif item == 'Customer Place':
areas.append('בבית הלקוח')
elif item == 'Remote Service':
areas.append('שירות מרחוק')
national = True
elif item == 'Via Phone or Mail':
areas.append('במענה טלפוני, צ׳אט או בדוא"ל')
national = True
elif item == 'Web Service':
areas.append('בשירות אינטרנטי מקוון')
national = True
elif item == 'Customer Appointment':
areas.append('במפגשים קבוצתיים או אישיים')
elif item == 'Program':
areas.append('תוכנית ייעודית בהרשמה מראש')
elif item in ('Not relevant', ''):
pass
def func(rows):
for row in rows:
if 'data' not in row:
# print('NO DATA', row)
yield row
continue

data = row['data']

responses = set()
situations = set()

row['name'] = data.pop('serviceName')
row['description'] = data.pop('voluntaryDescription') or data.pop('description')
data_source_url = f'https://www.guidestar.org.il/organization/{data["organization_id"]}/services'
row['data_sources'] = f'מידע נוסף אפשר למצוא ב<a target="_blank" href="{data_source_url}">גיידסטאר - אתר העמותות של ישראל</a>'
orgId = data.pop('organization_id')
actual_branch_ids = data.pop('actual_branch_ids')
row['branches'] = ['guidestar:' + b['branchId'] for b in (data.pop('branches') or []) if b['branchId'] in actual_branch_ids]
if len(row['branches']) == 0:
row['branches'] = ['guidestar:' + bid for bid in actual_branch_ids]

record_type = data.pop('recordType')
assert record_type == 'GreenInfo'
for k in list(data.keys()):
if k.startswith('youth'):
data.pop(k)

relatedMalkarService = data.pop('relatedMalkarService') or {}

update_from_taxonomy([data.pop('serviceTypeName')], responses, situations)
update_from_taxonomy((data.pop('serviceTargetAudience') or '').split(';'), responses, situations)
update_from_taxonomy(['soproc:' + relatedMalkarService.get('serviceGovId', '')], responses, situations)

payment_required = data.pop('paymentMethod')
if payment_required == 'Free service':
row['payment_required'] = 'no'
row['payment_details'] = None
elif payment_required == 'Symbolic cost':
row['payment_required'] = 'yes'
row['payment_details'] = 'עלות סמלית'
elif payment_required == 'Full payment':
row['payment_required'] = 'yes'
row['payment_details'] = 'השירות ניתן בתשלום'
elif payment_required == 'Government funded':
row['payment_required'] = 'yes'
row['payment_details'] = 'השירות מסובסד על ידי הממשלה'
else:
assert False, 'area {}: {!r}'.format(area, row)

if len(areas) > 1:
details.append('השירות ניתן: ' + ', '.join(areas))
elif len(areas) == 1:
details.append('השירות ניתן ' + ''.join(areas))

if national:
row['branches'] = [f'guidestar:{orgId}:national']

when = data.pop('whenServiceActive')
if when == 'All Year':
details.append('השירות ניתן בכל השנה')
elif when == 'Requires Signup':
details.append('השירות ניתן בהרשמה מראש')
elif when == 'Time Limited':
details.append('השירות מתקיים בתקופה מוגבלת')
elif when == 'Criteria Based':
details.append('השירות ניתן על פי תנאים או קריטריונים')
elif when is None:
pass
else:
assert False, 'when {}: {!r}'.format(when, row)

remoteDelivery = (data.pop('remoteServiceDelivery') or '').split(';')
# Phone, Chat / Email / Whatsapp, Internet, Zoom / Hybrid, Other
methods = []
for item in remoteDelivery:
if item == 'Phone':
methods.append('טלפון')
elif item == 'Chat / Email / Whatsapp':
methods.append('בצ׳אט, דוא"ל או וואטסאפ')
elif item == 'Internet':
methods.append('אתר אינטרנט')
elif item == 'Zoom / Hybrid':
methods.append('בשיחת זום')
elif item == '':
assert False, payment_required + ' ' + repr(row)

service_terms = data.pop('serviceTerms')
if service_terms:
if row.get('payment_details'):
row['payment_details'] += ', ' + service_terms
else:
row['payment_details'] = service_terms

details = []
areas = []
national = False

area = (data.pop('area') or '').split(';')
for item in area:
if item == 'In Branches':
areas.append('בסניפי הארגון')
elif item == 'Country wide':
areas.append('בתיאום מראש ברחבי הארץ')
national = True
elif item == 'Customer Place':
areas.append('בבית הלקוח')
elif item == 'Remote Service':
areas.append('שירות מרחוק')
national = True
elif item == 'Via Phone or Mail':
areas.append('במענה טלפוני, צ׳אט או בדוא"ל')
national = True
elif item == 'Web Service':
areas.append('בשירות אינטרנטי מקוון')
national = True
elif item == 'Customer Appointment':
areas.append('במפגשים קבוצתיים או אישיים')
elif item == 'Program':
areas.append('תוכנית ייעודית בהרשמה מראש')
elif item in ('Not relevant', ''):
pass
else:
assert False, 'area {}: {!r}'.format(area, row)

if len(areas) > 1:
details.append('השירות ניתן: ' + ', '.join(areas))
elif len(areas) == 1:
details.append('השירות ניתן ' + ''.join(areas))

if national:
row['branches'] = [f'guidestar:{orgId}:national']
if len(row['branches']) == 0:
continue

when = data.pop('whenServiceActive')
if when == 'All Year':
details.append('השירות ניתן בכל השנה')
elif when == 'Requires Signup':
details.append('השירות ניתן בהרשמה מראש')
elif when == 'Time Limited':
details.append('השירות מתקיים בתקופה מוגבלת')
elif when == 'Criteria Based':
details.append('השירות ניתן על פי תנאים או קריטריונים')
elif when is None:
pass
else:
assert False, 'remoteDelivery {!r}: {!r}'.format(item, remoteDelivery)

remoteDeliveryOther = data.pop('RemoteServiceDelivery_Other')
if remoteDeliveryOther:
methods.append(remoteDeliveryOther)

if len(methods) > 0:
details.append('שירות מרחוק באמצעות: ' + ', '.join(methods))

if relatedMalkarService:
relatedId = relatedMalkarService.get('serviceGovId')
relatedOffice = relatedMalkarService.get('serviceOffice')
print('GOT RELATED: id={}, office={}'.format(relatedId, relatedOffice))
if relatedId and relatedOffice:
row['implements'] = f'soproc:{relatedId}#{relatedOffice}'

row['details'] = '\n<br/>\n'.join(details)
url = data.pop('url')
url = fix_url(url)
if url:
row['urls'] = f'{url}#מידע נוסף על השירות'

phone_numbers = data.pop('Phone', data.pop('phone', None))
if phone_numbers:
row['phone_numbers'] = phone_numbers

email_address = data.pop('Email', data.pop('email', None))
if email_address:
row['email_address'] = email_address

for k in ('isForCoronaVirus', 'lastModifiedDate', 'serviceId', 'regNum', 'isForBranch'):
data.pop(k)
row['situations'] = sorted(situations)
row['responses'] = sorted(responses)
assert all(v in (None, '0') for v in data.values()), repr(data_source_url) + ':' + repr(data)
assert False, 'when {}: {!r}'.format(when, row)

remoteDelivery = (data.pop('remoteServiceDelivery') or '').split(';')
# Phone, Chat / Email / Whatsapp, Internet, Zoom / Hybrid, Other
methods = []
for item in remoteDelivery:
if item == 'Phone':
methods.append('טלפון')
elif item == 'Chat / Email / Whatsapp':
methods.append('בצ׳אט, דוא"ל או וואטסאפ')
elif item == 'Internet':
methods.append('אתר אינטרנט')
elif item == 'Zoom / Hybrid':
methods.append('בשיחת זום')
elif item == '':
pass
else:
assert False, 'remoteDelivery {!r}: {!r}'.format(item, remoteDelivery)

remoteDeliveryOther = data.pop('RemoteServiceDelivery_Other')
if remoteDeliveryOther:
methods.append(remoteDeliveryOther)

if len(methods) > 0:
details.append('שירות מרחוק באמצעות: ' + ', '.join(methods))

if relatedMalkarService:
relatedId = relatedMalkarService.get('serviceGovId')
relatedOffice = relatedMalkarService.get('serviceOffice')
print('GOT RELATED: id={}, office={}'.format(relatedId, relatedOffice))
if relatedId and relatedOffice:
row['implements'] = f'soproc:{relatedId}#{relatedOffice}'

row['details'] = '\n<br/>\n'.join(details)
url = data.pop('url')
url = fix_url(url)
if url:
row['urls'] = f'{url}#מידע נוסף על השירות'

phone_numbers = data.pop('Phone', data.pop('phone', None))
if phone_numbers:
row['phone_numbers'] = phone_numbers

email_address = data.pop('Email', data.pop('email', None))
if email_address:
row['email_address'] = email_address

for k in ('isForCoronaVirus', 'lastModifiedDate', 'serviceId', 'regNum', 'isForBranch'):
data.pop(k)
row['situations'] = sorted(situations)
row['responses'] = sorted(responses)
assert all(v in (None, '0') for v in data.values()), repr(data_source_url) + ':' + repr(data)
yield row

return DF.Flow(
func,
)
Expand Down
7 changes: 2 additions & 5 deletions operators/manual_data_entry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def mde_branch_flow():
branches = DF.Flow(
DF.checkpoint(CHECKPOINT),
DF.update_resource(-1, name='branches'),
DF.select_fields(['Org Id', 'Org Name', 'Branch Details', 'Branch Address', 'Branch Geocode',
'Branch Phone Number', 'Branch Email', 'Branch Website', 'Org Website']),
DF.select_fields(['Org Id', 'Branch Details', 'Branch Address', 'Branch Geocode',
'Branch Phone Number', 'Branch Email', 'Branch Website']),
DF.rename_fields({
'Org Id': 'organization',
'Branch Details': 'name',
Expand All @@ -141,7 +141,6 @@ def mde_branch_flow():
'Branch Phone Number': 'phone_numbers',
'Branch Email': 'email_address',
'Branch Website': 'urls',
'Org Website': 'org_urls',
}),
DF.add_field('id', 'string', lambda r: mde_id(r['organization'], r['address'], r['geocode'])),
DF.join_with_self('branches', ['id'], dict(
Expand All @@ -152,7 +151,6 @@ def mde_branch_flow():
phone_numbers=None,
email_address=None,
urls=None,
org_urls=None,
organization=None,
)),
DF.add_field('data', 'object', lambda r: dict(
Expand All @@ -162,7 +160,6 @@ def mde_branch_flow():
phone_numbers=r['phone_numbers'],
email_address=r['email_address'],
urls=r['urls'],
org_urls=r['org_urls'],
organization=[r['organization']],
)),
DF.select_fields(['id', 'data']),
Expand Down
2 changes: 1 addition & 1 deletion srm_tools/guidestar_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def organizations(self, limit=None, regNums=None, filter=True):
includingInactiveMalkars='false',
isDesc='false',
sort='regNum',
filter=f'branchCount>0;servicesCount>0;regNum>{minRegNum}'
filter=f'servicesCount>0;regNum>{minRegNum}'
)
resp = self.to_json(lambda: requests.get(f'{self.BASE}/organizations', params=params, headers=self.headers(), timeout=self.TIMEOUT))
for row in resp:
Expand Down

0 comments on commit b63860a

Please sign in to comment.