Skip to content

Commit

Permalink
fix(text): Don't break URLs in annotation (#1104)
Browse files Browse the repository at this point in the history
* refactor(text): Define MAX_WIDTH

* refactor(text): Define SPLITTER_WIDTH

* fix(text): Don't break URLs in annotation

Fixes #1101

* test: Add unit test for annotations

Add unit tests for annotations in xml2rfc.TextWriter.render_reference
method.
  • Loading branch information
kesara authored Feb 21, 2024
1 parent 20a5b30 commit 4278f7b
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 10 deletions.
62 changes: 62 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from xml2rfc.boilerplate_rfc_7841 import boilerplate_rfc_status_of_memo
from xml2rfc.walkpdf import xmldoc
from xml2rfc.writers.base import default_options
from xml2rfc.writers.text import MAX_WIDTH

try:
from xml2rfc import debug
Expand Down Expand Up @@ -635,6 +636,67 @@ def test_boilerplate_insert_status_of_memo_editorial(self):
target = 'https://www.rfc-editor.org/info/rfc9280'
self.assertEqual(target, rfc.xpath('./section/t/eref')[0].get('target'))

class TextWriterTest(unittest.TestCase):
'''TextWriter tests'''

def setUp(self):
xml2rfc.log.quiet = True
path = 'tests/input/elements.xml'
self.parser = xml2rfc.XmlRfcParser(path,
quiet=True,
options=default_options,
**options_for_xmlrfcparser)
self.xmlrfc = self.parser.parse()
self.writer = xml2rfc.TextWriter(self.xmlrfc, quiet=True)

def test_render_reference(self):
# test annotations
reference = '''
<references>
<reference anchor="REFTEST">
<front>
<title>Reference Test</title>
<author initials="J." surname="Doe" fullname="Jane Doe">
<organization>ACMI Corp.</organization>
</author>
<date month="February" year="2024"/>
</front>
<annotation>{annotation}</annotation>
</reference>
</references>'''
self.writer.refname_mapping['REFTEST'] = 'REFTEST'

# single line annotation
annotation = 'foobar'
references = lxml.etree.fromstring(reference.format(annotation=annotation))
lines = self.writer.render_reference(references.getchildren()[0], width=60)
self.assertEqual(len(lines), 1)
self.assertIn(annotation, lines[0].text)

# multi line annotation
annotation = '''Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa.
Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim.'''
references = lxml.etree.fromstring(reference.format(annotation=annotation))
lines = self.writer.render_reference(references.getchildren()[0], width=60)
self.assertGreater(len(lines), 1)
self.assertIn(annotation[:5], lines[0].text)

# single line annotation (larger than width and smaller than MAX_WIDTH)
annotation = 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commo'
references = lxml.etree.fromstring(reference.format(annotation=annotation))
lines = self.writer.render_reference(references.getchildren()[0], width=len(annotation)-5)
self.assertGreater(MAX_WIDTH, len(annotation))
self.assertGreater(len(lines), 1)
self.assertIn(annotation[:5], lines[0].text)

# annotation with URL
url = 'https://example.org'
annotation = f'Example: <eref target="{url}" />'
references = lxml.etree.fromstring(reference.format(annotation=annotation))
lines = self.writer.render_reference(references.getchildren()[0], width=60)
self.assertEqual(len(lines), 2)
self.assertIn(url, lines[1].text)


if __name__ == '__main__':
unittest.main()
24 changes: 14 additions & 10 deletions xml2rfc/writers/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
from xml2rfc.utils import justify_inline, clean_text


MAX_WIDTH = 72
SPLITTER_WIDTH = 67

IndexItem = namedtuple('indexitem', ['item', 'subitem', 'anchor', 'page', ])
Joiner = namedtuple('joiner', ['join', 'indent', 'hang', 'overlap', 'do_outdent'])
# Joiner parts:
Expand Down Expand Up @@ -80,8 +83,8 @@ def __init__(self, elem, prev, next=None, beg=None, end=None):
self.beg = beg # beginning line of block
self.end = end # ending line of block

wrapper = utils.TextWrapper(width=72)
splitter = utils.TextSplitter(width=67)
wrapper = utils.TextWrapper(width=MAX_WIDTH)
splitter = utils.TextSplitter(width=SPLITTER_WIDTH)
seen = set()

# This is not a complete list of whitespace characters, and isn't intended to be. It's
Expand Down Expand Up @@ -299,7 +302,7 @@ def process(self):
joiners = base_joiners
if self.options.pagination:
self.add_pageno_placeholders()
lines = self.render(self.root, width=72, joiners=joiners)
lines = self.render(self.root, width=MAX_WIDTH, joiners=joiners)

if self.options.pagination:
lines = findblocks(lines)
Expand All @@ -321,8 +324,8 @@ def process(self):
sys.stderr.write(("%3d %10s [%4s] %s\n" % (i, tag, page, l.text)))
for i, l in enumerate(lines):
length = len(l.text)
if length > 72:
self.warn(l.elem, "Too long line found (L%s), %s characters longer than 72 characters: \n%s" %(i+1, length-72, l.text))
if length > MAX_WIDTH:
self.warn(l.elem, f"Too long line found (L{i + 1}), {length - MAX_WIDTH} characters longer than {MAX_WIDTH} characters: \n{l.text}")

text = ('\n'.join( l.text for l in lines )).rstrip(stripspace) + '\n'

Expand Down Expand Up @@ -505,7 +508,7 @@ def update_toc(self, lines):
else:
pass
# new toc, to be used to replace the old one
toclines = self.render(toc, width=72, joiners=base_joiners)
toclines = self.render(toc, width=MAX_WIDTH, joiners=base_joiners)
if toc_start and toc_end:
j = 2
for i in range(toc_start+2, toc_end):
Expand Down Expand Up @@ -1811,15 +1814,15 @@ def join_cols(left, right):
textwidth_l = textwidth(l)
textwidth_r = textwidth(r)
#assert textwidth_l+textwidth_r< 70
w = 72-textwidth_l-textwidth_r
w = MAX_WIDTH-textwidth_l-textwidth_r
lines.append(l+' '*w+r)
return '\n'.join(lines).rstrip(stripspace)+'\n'
#
def wrap(label, items, left, right, suffix=''):
line = '%s%s%s' % (label, items, suffix)
ll = len(left)
lr = len(right)
width = 48 if ll >= lr else min(48, 72-4-len(right[ll]))
width = 48 if ll >= lr else min(48, MAX_WIDTH-4-len(right[ll]))
wrapper = textwrap.TextWrapper(width=width, subsequent_indent=' '*len(label))
return wrapper.wrap(line)
#
Expand Down Expand Up @@ -2746,7 +2749,8 @@ def render_reference(self, e, width, **kwargs):
text += '.'
for ctag in ('annotation', ):
for c in e.iterdescendants(ctag):
text = self.tjoin(text, c, width, **kwargs)
# use MAX_WIDTH here since text gets formatted later
text = self.tjoin(text, c, MAX_WIDTH, keep_url=True, **kwargs)
text = fill(text, width=width, fix_sentence_endings=False, keep_url=True, **kwargs).lstrip(stripspace)

text = indent(text, 11, 0)
Expand Down Expand Up @@ -3957,7 +3961,7 @@ def find_minwidths(e, cells, hyphen_split=False):
Find the minimum column widths of regular cells
"""
i = 0
splitter = utils.TextSplitter(width=67, hyphen_split=hyphen_split)
splitter = utils.TextSplitter(width=SPLITTER_WIDTH, hyphen_split=hyphen_split)
for p in e.iterchildren(['thead', 'tbody', 'tfoot']):
for r in list(p.iterchildren('tr')):
j = 0
Expand Down

0 comments on commit 4278f7b

Please sign in to comment.