-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathviews.py
319 lines (271 loc) · 12.7 KB
/
views.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
import re
import os
import base64
import json
from lxml import etree
from celery.result import AsyncResult
from django.shortcuts import render, get_object_or_404
from django.views.decorators.csrf import ensure_csrf_cookie
from django.views.decorators.http import require_http_methods
from django.contrib.auth.decorators import login_required, permission_required
from django.conf import settings
from django.http import HttpResponse, JsonResponse, HttpResponseRedirect
from django.urls import reverse
from django.forms import modelformset_factory as formset_factory
from django.db.models import Q
from rest_framework.request import Request
import api.views
from transcriptions import models, tasks
def get_login_status(request):
if request.user.is_authenticated:
return request.user.username
else:
return None
def process_validation_errors(log):
errors = []
for i in range(0, len(log)):
error = str(log[i])
error = error.replace('<string>:', 'Error in line ')
error = error.replace('{http://www.tei-c.org/ns/1.0}', '')
error = error.replace('0:ERROR:SCHEMASV:SCHEMAV_ELEMENT_CONTENT:', '')
errors.append(error)
return errors
def sort_by_sigla(item):
if item.siglum == 'basetext':
return -1
try:
return float(item.siglum)
except ValueError:
match = re.match(r'^(\d+)S(\d?)$', item.siglum)
if match is not None:
siglum = float(match.group(1)) + 0.1
if match.group(2) != '':
siglum += float(match.group(2))/100
return siglum
return item.siglum
def home(request):
post_login_url = request.path + '?' + request.GET.urlencode()
login_details = get_login_status(request)
if login_details is not None:
return HttpResponseRedirect('/transcriptions/manage')
data = {
'login_status': login_details,
'post_login_url': post_login_url,
'post_logout_url': '/transcriptions'
}
data['page_title'] = 'Transcription Home Page'
return render(request, 'transcriptions/home.html', data)
@login_required
@permission_required(['transcriptions.delete_transcription',
'transcriptions.delete_collationunit',
'transcriptions.change_transcription',
'transcriptions.change_collationunit',
'transcriptions.add_transcription',
'transcriptions.add_collationunit',
], raise_exception=True)
def manage(request):
if 'task' in request.GET:
post_login_url = request.path + '?' + request.GET.urlencode()
login_details = get_login_status(request)
task = AsyncResult(request.GET.get('task'))
siglum = request.GET.get('siglum')
data = {'siglum': siglum,
'result': task.result,
'state': task.state,
'task_id': task.task_id
}
return JsonResponse(data)
post_login_url = request.path + '?' + request.GET.urlencode()
login_details = get_login_status(request)
transcriptions = models.Transcription.objects.filter(user__id=request.user.id)
# sort them for the list display
transcription_list = []
for transcription in transcriptions:
transcription_list.append(transcription)
transcription_list.sort(key=sort_by_sigla)
data = {'login_status': login_details,
'post_login_url': post_login_url,
'post_logout_url': '/transcriptions',
'page_title': 'Transcription Uploader',
'transcriptions': transcription_list
}
return render(request, 'transcriptions/manage.html', data)
# validation function
def validate_xml(tree, filename, skip_schema=False):
results = {}
if not skip_schema:
# first check with the schema unless instructed to skip
schema_directory = os.path.join(settings.BASE_DIR, 'transcriptions', 'schema')
schema = etree.XMLSchema(etree.parse(os.path.join(schema_directory, 'TEI-MUYA.xsd')))
result = schema.validate(tree)
log = schema.error_log
if result is False:
results['valid'] = False
results['errors'] = process_validation_errors(log)
results['filename'] = filename
else:
results['valid'] = True
results['errors'] = []
results['filename'] = filename
else:
results['valid'] = True
results['errors'] = ['The file has not been validated against the schema.']
results['filename'] = filename
# check that all hands are included in the header
declared_hands = tree.xpath('//tei:listWit/tei:witness/@xml:id',
namespaces={'tei': 'http://www.tei-c.org/ns/1.0',
'xml': 'http://www.w3.org/XML/1998/namespace'})
declared_hands = set(declared_hands)
hands = tree.xpath('//tei:rdg/@hand', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})
unique_hands = set(hands)
missing_hands = unique_hands - declared_hands
if len(missing_hands) > 0:
results['valid'] = False
results['errors'].insert(0, 'There are hands in this transcription which have not been declared in the '
'header. The missing hands are: %s.' % ', '.join(missing_hands))
# is there a sigla and is it acceptable
titles = tree.xpath('//tei:title[@type="document"]/@n', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})
try:
siglum = titles[0]
except IndexError:
results['valid'] = False
results['errors'].insert(0, 'No siglum provided in the transcription. '
'The siglum should be at //tei:title[@type="document"]/@n')
else:
# TODO: this is perhaps better as a regex but it needs to work in partnership with the filtering of Supplements
# in the output so I am restricting here until I know how that will work.
if len(siglum) > 1 and siglum.rfind('S') == len(siglum) - 1:
vsiglum = siglum[:-1]
elif len(siglum) > 1 and siglum.rfind('S1') == len(siglum) - 2:
vsiglum = siglum[:-2]
elif len(siglum) > 1 and siglum.rfind('S2') == len(siglum) - 2:
vsiglum = siglum[:-2]
else:
vsiglum = siglum
if vsiglum != 'basetext' and not vsiglum.isdigit():
results['valid'] = False
results['errors'].insert(0, 'The siglum provided in the transcription (%s) does not comply with the '
'project conventions. It should be "basetext" or a numerical identifier '
'(possibly followed by S, S1 or S2).' % siglum)
# embedded app tags (it happens and is valid TEI but makes no sense in the context of these transcriptions)
if len(tree.xpath('//tei:app//tei:app', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})) > 0:
results['valid'] = False
results['errors'].insert(0, 'The transcription contains an app tag embedded in another app tag.'
'This cannot be indexed for collation and should be fixed.')
return results
@require_http_methods(["POST"])
def validate(request):
filename = request.POST.get('file_name', None)
skip_schema = request.POST.get('skip_schema', False)
base64file = request.POST.get('src', None)
if base64file is not None:
meta, content = base64file.split(',', 1)
ext_m = re.match("data:.*?/(.*?);base64", meta)
if not ext_m:
raise ValueError("Can't parse base64 file data ({})".format(meta))
real_content = base64.b64decode(content)
else:
real_content = request.POST.get('xml', None)
real_content = unquote(real_content)
try:
tree = etree.fromstring(real_content)
except lxml.etree.XMLSyntaxError:
return HttpResponse('the file was not well formed xml', status=415)
# now validate against our schema
results = validate_xml(tree, filename, skip_schema)
return JsonResponse(results)
@ensure_csrf_cookie
@require_http_methods(["POST"])
@permission_required(['transcriptions.delete_transcription',
'transcriptions.delete_collationunit',
'transcriptions.change_transcription',
'transcriptions.change_collationunit',
'transcriptions.add_transcription',
'transcriptions.add_collationunit',
], raise_exception=True)
def index(request):
filename = request.POST.get('file_name', None)
project_id = request.POST.get('project_id', None)
transcription_id = request.POST.get('transcription_id', None)
base64file = request.POST.get('src', None)
skip_schema = request.POST.get('skip_schema', False)
languages = []
for key in request.POST:
if key.find('language') == 0 and request.POST.get(key) != '':
languages.append(request.POST.get(key))
if base64file is not None:
meta, content = base64file.split(',', 1)
ext_m = re.match("data:.*?/(.*?);base64", meta)
if not ext_m:
raise ValueError("Can't parse base64 file data ({})".format(meta))
xml_string = base64.b64decode(content)
else:
xml_string = self.get_argument('xml', None)
xml_string = unquote(xml_string)
try:
tree = etree.fromstring(xml_string)
except lxml.etree.XMLSyntaxError:
return HttpResponse('the file was not well formed xml', status=415)
# now validate against our schema
results = validate_xml(tree, filename, skip_schema)
if results['valid'] is False:
return HttpResponse('the file did not validate (use the validate option for more detail)', status=415)
# else we have a valid XML file so we can continue to indexing
collection = request.POST.get('collection', 'unknown')
username = request.user.id
siglum = tree.xpath('//tei:title[@type="document"]/@n', namespaces={'tei': 'http://www.tei-c.org/ns/1.0'})[0]
if siglum == 'basetext':
public_flag = True
else:
public_flag = False
# remove the encoding declaration because etree parser does not support it
# (to do that it needs to be turned from bytes to string)
xml_string = re.sub(r'<\?xml.+?\?>', '', xml_string.decode('utf-8'))
# now we are allowed to start the indexing
task = tasks.index_transcription.delay(xml_string,
collection,
siglum=siglum,
username=username,
public_flag=public_flag,
languages=languages)
return HttpResponseRedirect('/transcriptions/manage?task=' + task.task_id + '&siglum=' + siglum)
@login_required
@require_http_methods(["GET"])
def schema_download(request):
schema_path = os.path.join(settings.BASE_DIR, 'transcriptions', 'schema', 'TEI-MUYA.rng')
file = open(schema_path, mode='r')
response = HttpResponse(file, content_type='text/rng+xml')
response['Content-Disposition'] = 'attachment; filename=TEI-MUYA.rng'
return response
@login_required
@ensure_csrf_cookie
@require_http_methods(["POST"])
@permission_required(['transcriptions.delete_transcription',
'transcriptions.delete_collationunit'
], raise_exception=True)
def delete(request):
transcription_id = request.POST.get('delete-transcription', None)
if transcription_id is not None:
try:
models.Transcription.objects.filter(id=transcription_id.split('|')[1]).delete()
except Exception:
pass
return HttpResponseRedirect(reverse('manage'))
@login_required
def collation_units(request):
post_login_url = request.path + '?' + request.GET.urlencode()
login_details = get_login_status(request)
siglum = request.GET.get('siglum', None)
units = models.CollationUnit.objects.all().filter(siglum=siglum,
user=request.user).distinct().order_by('language',
'chapter_number',
'stanza_number',
'line_number')
context = {
'login_status': login_details,
'post_login_url': post_login_url,
'post_logout_url': '/transcriptions',
'page_title': 'Collation units for {}'.format(siglum),
'data': units
}
return render(request, 'transcriptions/collation_units.html', context)