Skip to content

Commit

Permalink
Merge pull request #24 from bytinbit/refactor/switch-to-pikepdf
Browse files Browse the repository at this point in the history
Switch from PyPDF2 to pikepdf
  • Loading branch information
bytinbit authored Jan 11, 2021
2 parents a8b1378 + 8cada69 commit c5170f9
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 64 deletions.
26 changes: 13 additions & 13 deletions nobubo/calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from dataclasses import dataclass
from typing import List

import PyPDF2
import pikepdf

from nobubo import core

Expand All @@ -41,23 +41,21 @@ class Factor:
def parse_cli_input(input_layout: (int, int, int), output_layout_cli: str, print_margin: int,
reverse_assembly: bool, input_path: str, output_path: str
) -> (core.InputProperties, core.OutputProperties):
with open(pathlib.Path(input_path), "rb") as inputfile:
reader = PyPDF2.PdfFileReader(inputfile, strict=False)

width, height = calculate_page_dimensions(
reader.getPage(1)) # first page (getPage(0)) may contain overview
with pikepdf.open(pathlib.Path(input_path)) as inputfile:
# first page (getPage(0)) may contain overview, so get second one
width, height = calculate_page_dimensions(inputfile.pages[1])
input_properties = core.InputProperties(
input_filepath=pathlib.Path(input_path),
output_path=pathlib.Path(output_path),
number_of_pages=reader.getNumPages(),
number_of_pages=len(inputfile.pages),
pagesize=core.PageSize(width=width, height=height),
layout=parse_input_layouts(input_layout),
reverse_assembly=reverse_assembly)

output_properties = core.OutputProperties(output_path=pathlib.Path(output_path),
output_layout=parse_output_layout(output_layout_cli, print_margin),
)
return input_properties, output_properties
return input_properties, output_properties


def parse_input_layouts(input_layout: (int, int, int)) ->[core.Layout]:
Expand All @@ -83,15 +81,17 @@ def calculate_pages_needed(layout: core.Layout, n_up_factor: Factor) -> int:
return math.ceil(layout.columns/n_up_factor.x) * math.ceil(layout.rows/n_up_factor.y)


def calculate_page_dimensions(page: PyPDF2.pdf.PageObject) -> (float, float):
def calculate_page_dimensions(page: pikepdf.Page) -> (float, float):
"""
Calculates the x, y value for the offset in default user space units as defined in the pdf standard.
Uses the cropBox value, since this is the area visible to the printer.
:param page: A pattern page.
:param page: A PDF page.
:return: list with x, y value.
"""
return round(float(page.cropBox[2])-float(page.cropBox[0]), 2), \
round(float(page.cropBox[3])-float(page.cropBox[1]), 2)
if not hasattr(page, "CropBox"):
box = page.MediaBox
else:
box = page.CropBox
return round(float(box[2])-float(box[0]), 2), round(float(box[3])-float(box[1]), 2)


def convert_to_userspaceunits(width_height: [int, int]) -> core.PageSize:
Expand Down
46 changes: 22 additions & 24 deletions nobubo/disassembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from copy import copy
import pathlib

import PyPDF2
from pikepdf import Pdf, Page

from nobubo import core, calc, output

Expand All @@ -31,25 +31,23 @@ def create_output_files(temp_collage_paths: [pathlib.Path],
input_properties: core.InputProperties,
output_properties: core.OutputProperties):
for counter, collage_path in enumerate(temp_collage_paths):
with collage_path.open("rb") as collagefile:
reader = PyPDF2.PdfFileReader(collagefile, strict=False)
collage = reader.getPage(0)
new_outputpath = calc.generate_new_outputpath(output_properties.output_path, counter)
print(f"\nChopping up the collage...")
chopped_up_files = _create_output_files(collage, input_properties.pagesize,
input_properties.layout[counter], output_properties.output_layout)
print(f"Successfully chopped up the collage.\n")
output.write_chops(chopped_up_files, new_outputpath)
print(f"Final pdf written to {new_outputpath}. Enjoy your sewing :)")


def _create_output_files(assembled_collage: PyPDF2.pdf.PageObject,
collage = Pdf.open(collage_path)
new_outputpath = calc.generate_new_outputpath(output_properties.output_path, counter)
print(f"\nChopping up the collage...")
chopped_up_files = _create_output_files(collage, input_properties.pagesize,
input_properties.layout[counter], output_properties.output_layout)
print(f"Successfully chopped up the collage.\n")
output.write_chops(chopped_up_files, new_outputpath)
print(f"Final pdf written to {new_outputpath}. Enjoy your sewing :)")


def _create_output_files(collage: Pdf,
pagesize: core.PageSize,
current_layout: core.Layout,
output_layout: [int]) -> PyPDF2.PdfFileWriter:
output_layout: [int]) -> Pdf:
"""
Chops up the collage that consists of all the pattern pages to individual pages of the desired output size.
:param assembled_collage: One pdf page that contains all assembled pattern pages.
:param collage: One pdf page that contains all assembled pattern pages.
:param input_properties: Properties of the pdf.
:param output_layout: The desired output layout.
:return: The pdf with several pages, ready to write to disk.
Expand All @@ -59,10 +57,11 @@ def _create_output_files(assembled_collage: PyPDF2.pdf.PageObject,
lowerleft_factor = calc.Factor(x=0, y=0)
upperright_factor = calc.Factor(x=1, y=1)

writer = PyPDF2.PdfFileWriter()
for x in range(0, calc.calculate_pages_needed(current_layout, n_up_factor)):
page = copy(assembled_collage)
# cf. https://stackoverflow.com/questions/52315259/pypdf2-cant-add-multiple-cropped-pages#
output = Pdf.new()
output.copy_foreign(collage.Root) # TODO must Root be updated if new pages are added?
# Root must be copied too, not only the page: thanks to https://github.com/cfcurtis/sewingutils for this!
for i in range(0, calc.calculate_pages_needed(current_layout, n_up_factor)):
page = output.copy_foreign(collage.pages[0])

lowerleft: core.Point = _calculate_lowerleft_point(lowerleft_factor, n_up_factor, pagesize)
upperright: core.Point = _calculate_upperright_point(upperright_factor, n_up_factor, current_layout, pagesize)
Expand All @@ -71,11 +70,10 @@ def _create_output_files(assembled_collage: PyPDF2.pdf.PageObject,
colsleft = _calculate_colsrows_left(current_layout.columns, upperright_factor.x, n_up_factor.x)
lowerleft_factor, upperright_factor = _adjust_factors(lowerleft_factor, upperright_factor, colsleft)

page.cropBox.lowerLeft = (lowerleft.x, lowerleft.y)
page.cropBox.upperRight = (upperright.x, upperright.y)
writer.addPage(page)
page.CropBox = [lowerleft.x, lowerleft.y, upperright.x, upperright.y]
output.pages.append(page)

return writer
return output


def _calculate_colsrows_left(layout_element: int, factor: int, nup_factor: int) -> int:
Expand Down
14 changes: 5 additions & 9 deletions nobubo/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,25 @@
#
# You should have received a copy of the GNU Affero General Public License
# along with Nobubo. If not, see <https://www.gnu.org/licenses/>.
import PyPDF2
from pikepdf import Pdf
import pathlib
import sys

from nobubo import core, calc


def write_chops(pypdf2_writer: PyPDF2.PdfFileWriter, output_path: pathlib.Path):
def write_chops(collage: Pdf, output_path: pathlib.Path):
print("Writing file...")
try:
with open(output_path, "wb") as output:
pypdf2_writer.write(output)
collage.save(output_path)
except OSError as e:
print(f"While writing the file, this error occurred:\n{e}")
sys.exit(1)


def write_collage(temp_collage_paths: [pathlib.Path], output_properties: core.OutputProperties):
for counter, collage_path in enumerate(temp_collage_paths):
writer = PyPDF2.PdfFileWriter()
new_outputpath = calc.generate_new_outputpath(output_properties.output_path, counter)
with collage_path.open("rb") as collagefile:
reader = PyPDF2.PdfFileReader(collagefile, strict=False)
writer.addPage(reader.getPage(0))
write_chops(writer, new_outputpath)
temp_collage = Pdf.open(collage_path)
temp_collage.save(new_outputpath)
print(f"Collage written to {new_outputpath}. Enjoy your sewing :)")
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
click >= 7.1.2
PyPDF2 >= 1.26.0
pikepdf >= 1.19.3

4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="nobubo-bytinbit",
version="1.1.0",
version="1.2.0",
description="Nobubo assembles a digital pdf sewing pattern and chops it into a desired output size to be printed.",
long_description=long_description,
long_description_content_type="text/markdown",
Expand All @@ -17,7 +17,7 @@
"console_scripts": ["nobubo = nobubo.nobubo:main"]
},
python_requires=">=3.7",
install_requires=["click", "PyPDF2"],
install_requires=["click", "pikepdf"],
classifiers=[
"Topic :: Printing",
"Topic :: Utilities",
Expand Down
25 changes: 13 additions & 12 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pathlib
import PyPDF2
# import PyPDF2
import pikepdf
import pytest

import textract
Expand All @@ -15,22 +16,27 @@ def __init__(self, outputdir: pathlib.Path) -> None:

def read(self):
for filepath in self.outputdir.glob("*.pdf"):
file = open(filepath, "rb")
file = pikepdf.open(filepath)
self._files.append(file)
self.readers[filepath.name] = PyPDF2.PdfFileReader(file)
self.readers[filepath.name] = file
return sorted(self.readers.keys())

def pagesize(self, filename: str, pagenumber: int=0) -> [float, float]:
reader = self.readers[filename]
page = reader.getPage(pagenumber)
return [round(float(page.cropBox[2])-float(page.cropBox[0]), 2), round(float(page.cropBox[3])-float(page.cropBox[1]), 2)]
page = reader.pages[pagenumber]
if not hasattr(page, "CropBox"):
box = page.MediaBox
else:
box = page.CropBox
return [round(float(box[2])-float(box[0]), 2),
round(float(box[3])-float(box[1]), 2)]

def pagecount(self, filename: str) -> int:
reader = self.readers[filename]
return reader.getNumPages()
return len(reader.pages)

# TODO is there a better way to check the order of the pages?
def pages_order(self, filepath: str, pageamount: int=1) -> [str, str]:
def pages_order(self, filepath: str) -> [str, str]:
text = str(textract.process(filepath, encoding="utf-8"), "utf-8").split("\n\n")
# texteract finds ascii value '\f' (form feed, \x0c) that must be removed
res = list(filter(lambda a: a not in '\x0c', text))
Expand Down Expand Up @@ -80,11 +86,6 @@ def two_overviews() -> [core.Layout, core.Layout]:
return [first, second]


@pytest.fixture()
def one_pdf_page_same_boxes() -> PyPDF2.pdf.PageObject:
return PyPDF2.pdf.PageObject.createBlankPage(None, 483.307, 729.917)


@pytest.fixture()
def n_up_factor_a0() -> calc.Factor:
return calc.Factor(x=4, y=4)
Expand Down
3 changes: 0 additions & 3 deletions tests/test_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ def test_calculate_pages_needed_oneoverview_a0_unevenlayout(self, one_overview_u
def test_calculate_pages_needed_oneoverview_custom_unevenlayout(self, one_overview_uneven, nup_factor_custom):
assert calc.calculate_pages_needed(one_overview_uneven, nup_factor_custom) == 2

def test_calculate_offset(self, one_pdf_page_same_boxes):
assert calc.calculate_page_dimensions(one_pdf_page_same_boxes) == (483.31, 729.92)

def test_userspaceunits_conversion_a0(self):
paper = calc.convert_to_userspaceunits([841, 1189])
assert paper.width == 2383.937
Expand Down

0 comments on commit c5170f9

Please sign in to comment.