Skip to content

Commit

Permalink
re-order the units again (back to longest first)
Browse files Browse the repository at this point in the history
  • Loading branch information
catsmith committed Mar 23, 2024
1 parent 6d75213 commit 59c3e0e
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 19 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1452,8 +1452,6 @@ Catena Dev branch changes

* Exported XML apparatus uses the n attribute for the identifier of ```<ab>``` elements rather than xml:id. The value of the attribute remains unchanged.

* In exprter.py the order of overlapped units in relation to top line units has changed. The shortest unit starting at each index point now comes first (previously the the longest unit came first).

* In exporter.py ```get_lemma_text()``` now takes start and end arguments as strings. This is important for dealing with joined units in inheriting exporters.

* In exporter.py ```get_text()``` when om and lac are returned their string value is always returned with the full stop eg. ```om.```
Expand Down
13 changes: 7 additions & 6 deletions exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,11 @@ def get_app_units(self, apparatus, overtext, context, missing):
app_list.append(app)
return app_list

def get_overtext_data(self, structure):
return {'current': structure['overtext'][0]}
def get_overtext_data(self, entry):
return {'current': entry['structure']['overtext'][0]}

def sort_units(self, unit):
return (unit['start'], -unit['end'])

def get_unit_xml(self, entry):
"""Function to turn the JSON apparatus of the collation unit into TEI XML.
Expand Down Expand Up @@ -428,10 +431,8 @@ def get_unit_xml(self, entry):
# if we are ignoring the basetext add it to our missing list so it isn't listed (except in lemma)
if self.ignore_basetext:
missing.append(basetext_siglum)
# this sort will change the order of the overlap units so shortest starting at each index point comes first
apparatus = sorted(apparatus, key=lambda d: (d['start'], d['end']))

overtext = self.get_overtext_data(entry['structure'])
apparatus = sorted(apparatus, key=self.sort_units)
overtext = self.get_overtext_data(entry)

app_units = self.get_app_units(apparatus, overtext, context, missing)
for app in app_units:
Expand Down
22 changes: 11 additions & 11 deletions tests_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,40 +729,40 @@ def test_get_lemma_text_defaults(self):

# test a few ranges where there is data
expected_text = ['ἀποκαλύψαι τὸν υἱὸν']
generated_text = exp.get_lemma_text(overtext, 2, 6)
generated_text = exp.get_lemma_text(overtext, '2', '6')
self.assertEqual(expected_text, generated_text)

expected_text = ['ἐν']
generated_text = exp.get_lemma_text(overtext, 10, 10)
generated_text = exp.get_lemma_text(overtext, '10', '10')
self.assertEqual(expected_text, generated_text)

expected_text = ['ἀποκαλύψαι']
generated_text = exp.get_lemma_text(overtext, 1, 3)
generated_text = exp.get_lemma_text(overtext, '1', '3')
self.assertEqual(expected_text, generated_text)

expected_text = ['ἔθνεσιν']
generated_text = exp.get_lemma_text(overtext, 24, 24)
generated_text = exp.get_lemma_text(overtext, '24', '24')
self.assertEqual(expected_text, generated_text)

# test where there is no data
expected_text = ['', 'om']
generated_text = exp.get_lemma_text(overtext, 27, 27)
generated_text = exp.get_lemma_text(overtext, '27', '27')
self.assertEqual(expected_text, generated_text)

expected_text = ['', 'om']
generated_text = exp.get_lemma_text(overtext, 1, 1)
generated_text = exp.get_lemma_text(overtext, '1', '1')
self.assertEqual(expected_text, generated_text)

def test_get_lemma_text_with_punctuation(self):
exp = Exporter(include_punctuation=True)
overtext = {'current': self.OVERTEXT[0]}

expected_text = ['ἔθνεσιν,']
generated_text = exp.get_lemma_text(overtext, 24, 24)
generated_text = exp.get_lemma_text(overtext, '24', '24')
self.assertEqual(expected_text, generated_text)

expected_text = ['(ἐν)']
generated_text = exp.get_lemma_text(overtext, 10, 10)
generated_text = exp.get_lemma_text(overtext, '10', '10')
self.assertEqual(expected_text, generated_text)

def test_get_witnesses(self):
Expand Down Expand Up @@ -1374,8 +1374,8 @@ def test_get_unit_xml_unit_sorting(self, mocked_get_app_units):
{'start': 2, 'end': 4},
{'start': 9, 'end': 9},
{'start': 2, 'end': 8}]
expected_ordered_units = [{'start': 2, 'end': 4},
{'start': 2, 'end': 8},
expected_ordered_units = [{'start': 2, 'end': 8},
{'start': 2, 'end': 4},
{'start': 6, 'end': 8},
{'start': 9, 'end': 9}]
original_app = {'context': 'Gal.1.1',
Expand Down Expand Up @@ -1403,7 +1403,7 @@ def test_get_unit_apparatus_line_sorting(self, mocked_get_app_units):
'om_readings': []}
}
exp = Exporter()
expected_app_order = [{'start': 2, 'end': 4}, {'start': 2, 'end': 8}, {'start': 9, 'end': 9}]
expected_app_order = [{'start': 2, 'end': 8}, {'start': 2, 'end': 4}, {'start': 9, 'end': 9}]
exp.get_unit_xml(app)
mocked_get_app_units.assert_called_with(expected_app_order, {'current': {'id': 'basetext'}}, 'Gal.1.1', [])

Expand Down

0 comments on commit 59c3e0e

Please sign in to comment.