forked from nzjs/Automated-Survey123-Reports
-
Notifications
You must be signed in to change notification settings - Fork 0
/
S123ReportAndEmailSubmissions.py
268 lines (221 loc) · 11.9 KB
/
S123ReportAndEmailSubmissions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
import os
import smtplib
from email import encoders
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import urllib
import requests
import json
import sys
import docx
import glob
import datetime
from arcgis.gis import GIS
from arcgis.apps.survey123._survey import SurveyManager, Survey
# --------------------------------------------------------------------------------------------------------------------------
#
# This script takes an input Survey ID (from Survey123), a report template (uploaded to the Survey Reports [Beta] tab), and will do the following:
#
# - Generate .docx reports for any new Survey submissions from the last 24 hours,
# - Save these reports in bulk to AGOL, download them one by one to a location, then remove them from AGOL when finished,
# - Read and extract an email address from each .docx file,
# - Send the relevant .docx file as an attachment to the relevant recipient,
# - Remove the .docx file once the email has sent,
# - Logs the daily results to a txt file in the output folder.
#
# Developed by John Stowell, 2019
#
# Call this script with python "..\S123ReportAndEmailSubmissions.py"
# - eg. in a Unix Cron job or Windows Task Scheduler that runs once a day - since we are always looking for submissions from the last 24 hours
#
# Also, this script generates the KeyError: 'results' but still works due to the try/except/finally block...
# Related to this ESRI bug:
# BUG-000119057 : The Python API 1.5.2 generate_report() method of the arcgis.apps.survey123 module, generates the following error: { KeyError: 'results' }
#
# API docs: https://esri.github.io/arcgis-python-api/apidoc/html/arcgis.apps.survey123.html
#
# --------------------------------------------------------------------------------------------------------------------------
def main():
# Customise the variables below
# --- AGOL information... ---
org = 'https://YOUR-ORGANISATION.maps.arcgis.com'
username = 'ARCGIS ONLINE USERNAME'
password = 'ARCGIS ONLINE PASSWORD'
# --- Survey123 variables... ---
surveyID = 'ID OF SURVEY123 FORM' # ID of desired Survey123 form - a unique ID like 4c1b359c4e294c54a02b262b42413f17
output_folder = r'C:\GISWORK\_tmp\Reports' # Output folder WITHOUT trailing slash. This is also where the log file is stored.
# WHERE_FILTER: Use '1=1' to return all records, or something like {{"where":"<col>='<value>'"} - supports SQL syntax
# Docs for date queries: https://www.esri.com/arcgis-blog/products/api-rest/data-management/querying-feature-services-date-time-queries/
# In our case below, we filter by records created in the last 1 day. This works for us as the script is run on a daily schedule.
where_filter = '{"where":"CreationDate >= CURRENT_TIMESTAMP - INTERVAL \'1\' DAY"}'
utc_offset = '+13:00' # UTC Offset for location (+13 is NZST)
report_title = 'Daily_Export' # Title that will show in S123 recent task list
report_template = 1 # ID of the print template in Survey123 that you want to use (0 = ESRI's sample, 1 = first custom report, 2 = second custom report, etc)
# --- Email SMTP settings... ---
email_user = 'EMAIL ADDRESS' # Eg. user@gmail.com. Requires a valid SMTP-enabled email account (Eg. a Gmail acct with the SMTP settings below)
email_password = 'EMAIL ACCOUNT PASSWORD' # Password for the email account
smtp_server = 'smtp.gmail.com'
smtp_port = 587
# --------------------------------------------------------------------------------------------------------------------------
# Don't edit below this line - unless you know what you are doing :)
# --------------------------------------------------------------------------------------------------------------------------
log = output_folder+"\daily_export_log.txt"
# Create a log file if it doesn't exist
print('', file=open(log, "a+"))
# Date variables for later use
today = datetime.datetime.today()
yesterday = today-datetime.timedelta(1)
# -------------------------------------------------------------
# REPORT GENERATION AND DOWNLOAD PROCESS
# Initialise AGOL login by script
print('--------------------------------------------------------------------------------------------------------------------------', file=open(log, "a"))
print('--- STARTING REPORT GENERATION PROCESS ---', today, file=open(log, "a"))
print('')
print('Initialising session in AGOL', file=open(log, "a"))
print('')
agol_login = GIS(org, username, password)
print('Reading Survey123 information for ID: ',surveyID, file=open(log, "a"))
print('')
surveymgr = SurveyManager(agol_login)
survey = surveymgr.get(surveyID)
# print('Templates available: ',survey.report_templates) # Return all available print templates for the survey
# print('')
template = survey.report_templates[report_template]
print('Selected template: ',template, file=open(log, "a"))
print('')
reportCount = 0
# Try/except/finally block to workaround the KeyError: 'results' bug in the generate_report method
# (Waiting on ESRI to fix this bug.)
try:
print('Generating report(s) for submissions from last 24 hours', file=open(log, "a"))
print('')
## Original Example: survey.generate_report(template, '1=1') #generates report for all features
## API Docu Example: survey.generate_report(report_template: arcgis.gis.Item, where: str = '1=1', utc_offset: str = '+00:00', report_title: str = None, folder_id: str = None)
## Our Example: survey.generate_report(template, '1=1', '+13:00', 'Test_Report_Export')
survey.generate_report(template, where_filter, utc_offset, report_title)
except Exception as e:
print('>> ERROR: KeyError: ',e,' (related to ESRI BUG-000119057)', file=open(log, "a"))
print('>> Continuing...', file=open(log, "a"))
print('')
pass
finally:
print('Downloading relevant report(s) to: ',output_folder, file=open(log, "a"))
print('')
# Find all Microsoft Word doc files in AGOL with "Survey 123" in the tags
for x in survey.reports:
# Find the creation date (Unix epoch) and convert to local time
creationdate = datetime.datetime.fromtimestamp(x.created / 1e3)
# Only find and download AGOL reports created in the last 24 hours
# (this will download reports created manually, as well as ones generated by this script)
if (creationdate > yesterday):
# print('Created epoch ',x.created) # Uncomment datestamps below for testing
# print('Created converted ',creationdate)
# print('Today converted ',today)
# print('Yesterday timedelta ',yesterday)
reportCount += 1
# Only return reports that contain the surveyID in the html code of our description
# This should normally return reports generated with the generate_report() method
if surveyID in x.description:
print('Report desc: ',x.description, file=open(log, "a"))
print('')
id = x.id # Get ID of each Word doc AGOL item
data_item = agol_login.content.get(id)
data_item.download(save_path = output_folder) # Download each Word doc to specified location
data_item.delete() # Delete each Word doc item in AGOL (after download finished/no longer required)
# Finally block end
print('REPORTS GENERATED: ',reportCount, file=open(log, "a"))
print('--- REPORT GENERATION PROCESS - FINISHED ---', file=open(log, "a"))
print('', file=open(log, "a"))
# -------------------------------------------------------------
# EMAIL REPORT TO USERS PROCESS
# Optional - email the report documents to specified email address.
# Now we cycle through the new .docx reports in our output_folder, extract the user email address and send the attachment to the email that was collected with S123
print('--- STARTING EMAIL PROCESS ---', file=open(log, "a"))
sender = email_user
documentCount = 0
print('')
print('Getting list of Word docx files in: ',output_folder, file=open(log, "a"))
# Add all files ending with .docx to a new list
file_list = glob.glob(output_folder+'\*.docx')
print('Files:', file=open(log, "a"))
for file_name in file_list:
documentCount += 1
print(file_name, file=open(log, "a"))
print('')
print('Reading raw table data from Word document(s)', file=open(log, "a"))
print('')
for file_name in file_list:
#print(filename)
# Here we read the tabular data from within our docx report - this is based on the report template that you have created...
# In my case, there's a table with 10 rows, and row 8 happens to have the Email address that was collected by Survey123.
docx_data = readDocxTables(file_name)
# Data[7] happens to be our "Email" row (row 8) in the table within each docx template
#print('Data7: ',data[7])
recipient = [str(v) for k,v in docx_data[7].items()][0]
print('Sending email with attachment to recipient: ',recipient, file=open(log, "a"))
# Initialise the email and create the enclosing (outer) message
outer = MIMEMultipart()
outer['Subject'] = 'Survey Report Attached ' + str(today)
outer['To'] = recipient
outer['From'] = sender
outer.preamble = 'You will not see this in a MIME-aware mail reader.\n'
msg_text = 'From ORGANISATION:\n\nPlease find the attached Survey Report from our recent visit or discussion. This is a copy of the information discussed and collected for your records.\n\nKind regards,\nYOUR ORGANISATION (email@org.com)\n\n\n\nNOTE: This is an automated message, please do not reply.'
# Add the attachment to the message
try:
with open(file_name, 'rb') as fp:
msg = MIMEBase('application', 'octet-stream')
msg.set_payload(fp.read())
encoders.encode_base64(msg)
msg.add_header('Content-Disposition', 'attachment', filename=os.path.basename(file_name))
outer.attach(msg)
outer.attach(MIMEText(msg_text, 'plain')) # or 'html'
except:
print('Unable to open one of the attachments. Error: ', sys.exc_info()[0], file=open(log, "a"))
raise
composed = outer.as_string()
# Send the email via SMTP - we're using Google SMTP servers below
try:
with smtplib.SMTP(smtp_server, smtp_port) as s:
s.ehlo()
s.starttls()
s.ehlo()
s.login(sender, email_password)
s.sendmail(sender, recipient, composed)
s.close()
# Email sent, now let's remove the file so that it isn't sent again the following day
os.remove(file_name)
print('Email sent to recipient and removed file from download location.', file=open(log, "a"))
print('')
except:
print('Unable to send the email. Error: ', sys.exc_info()[0], file=open(log, "a"))
raise
print('DOCUMENTS SENT TO RECIPIENTS: ',documentCount, file=open(log, "a"))
print('--- EMAIL PROCESS - FINISHED --- ', file=open(log, "a"))
print('', file=open(log, "a"))
print('', file=open(log, "a"))
# -------------------------------------------------------------
# OTHER FUNCTIONS
# Function to retrieve raw text only
def readText(filename):
doc = docx.Document(filename)
fullText = []
for para in doc.paragraphs:
fullText.append(para.text)
return '\n'.join(fullText)
## Function to retrieve text from tables
def readDocxTables(filename):
document = docx.Document(filename)
table = document.tables[1] # 0 = logo, 1 = first block
data = []
keys = None
for i, row in enumerate(table.rows):
text = (cell.text for cell in row.cells)
if i == 0:
keys = tuple(text)
continue
row_data = dict(zip(keys, text))
data.append(row_data)
return(data)
if __name__ == '__main__':
main()