Skip to content
This repository has been archived by the owner on Apr 15, 2024. It is now read-only.

Commit

Permalink
Added: a simpler ordering mode when 1<F.
Browse files Browse the repository at this point in the history
  • Loading branch information
euske committed Sep 26, 2016
1 parent 44977b6 commit 8150458
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
3 changes: 2 additions & 1 deletion docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<div align=right class=lastmod>
<!-- hhmts start -->
Last Modified: Wed Jun 25 10:27:52 UTC 2014
Last Modified: Mon Sep 26 09:04:15 UTC 2016
<!-- hhmts end -->
</div>

Expand Down Expand Up @@ -268,6 +268,7 @@ <h4>Options</h4>
<dd> Specifies how much a horizontal and vertical position of a text matters
when determining a text order. The value should be within the range of
-1.0 (only horizontal position matters) to +1.0 (only vertical position matters).
When this value is out of the range (e.g. +2), a simpler ordering rule is used.
The default value is 0.5.
<p>
<dt> <code>-C</code>
Expand Down
11 changes: 9 additions & 2 deletions pdfminer/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,13 +676,20 @@ def analyze(self, laparams):
for obj in empties:
obj.analyze(laparams)
textboxes = list(self.group_textlines(laparams, textlines))
if textboxes:
if -1 <= laparams.boxes_flow and laparams.boxes_flow <= +1 and textboxes:
self.groups = self.group_textboxes(laparams, textboxes)
assigner = IndexAssigner()
for group in self.groups:
group.analyze(laparams)
assigner.run(group)
textboxes.sort(key=lambda box: box.index)
else:
def getkey(box):
if isinstance(box, LTTextBoxVertical):
return (0, -box.x1, box.y0)
else:
return (1, box.y0, box.x0)
textboxes.sort(key=getkey)
self._objs = textboxes + otherobjs + empties
return

Expand Down Expand Up @@ -725,4 +732,4 @@ def __init__(self, pageid, bbox, rotate=0):
def __repr__(self):
return ('<%s(%r) %s rotate=%r>' %
(self.__class__.__name__, self.pageid,
bbox2str(self.bbox), self.rotate))
bbox2str(self.bbox), self.rotate))

0 comments on commit 8150458

Please sign in to comment.