From 0993cc8ef0cf413658a1efaf6dd97c2ad17e9fa1 Mon Sep 17 00:00:00 2001 From: bytinbit Date: Mon, 4 Jan 2021 18:09:47 +0100 Subject: [PATCH 1/6] add first draft with pikepdf --- nobubo/disassembly.py | 45 ++++++++++++++++++++----------------------- nobubo/output.py | 13 +++++-------- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/nobubo/disassembly.py b/nobubo/disassembly.py index c9a50aa..6490056 100644 --- a/nobubo/disassembly.py +++ b/nobubo/disassembly.py @@ -22,7 +22,7 @@ from copy import copy import pathlib -import PyPDF2 +from pikepdf import Pdf, Page from nobubo import core, calc, output @@ -31,25 +31,23 @@ def create_output_files(temp_collage_paths: [pathlib.Path], input_properties: core.InputProperties, output_properties: core.OutputProperties): for counter, collage_path in enumerate(temp_collage_paths): - with collage_path.open("rb") as collagefile: - reader = PyPDF2.PdfFileReader(collagefile, strict=False) - collage = reader.getPage(0) - new_outputpath = calc.generate_new_outputpath(output_properties.output_path, counter) - print(f"\nChopping up the collage...") - chopped_up_files = _create_output_files(collage, input_properties.pagesize, - input_properties.layout[counter], output_properties.output_layout) - print(f"Successfully chopped up the collage.\n") - output.write_chops(chopped_up_files, new_outputpath) - print(f"Final pdf written to {new_outputpath}. Enjoy your sewing :)") - - -def _create_output_files(assembled_collage: PyPDF2.pdf.PageObject, + collage = Pdf.open(collage_path) + new_outputpath = calc.generate_new_outputpath(output_properties.output_path, counter) + print(f"\nChopping up the collage...") + chopped_up_files = _create_output_files(collage, input_properties.pagesize, + input_properties.layout[counter], output_properties.output_layout) + print(f"Successfully chopped up the collage.\n") + output.write_chops(chopped_up_files, new_outputpath) + print(f"Final pdf written to {new_outputpath}. Enjoy your sewing :)") + + +def _create_output_files(collage: Pdf, pagesize: core.PageSize, current_layout: core.Layout, - output_layout: [int]) -> PyPDF2.PdfFileWriter: + output_layout: [int]) -> Pdf: """ Chops up the collage that consists of all the pattern pages to individual pages of the desired output size. - :param assembled_collage: One pdf page that contains all assembled pattern pages. + :param collage: One pdf page that contains all assembled pattern pages. :param input_properties: Properties of the pdf. :param output_layout: The desired output layout. :return: The pdf with several pages, ready to write to disk. @@ -59,10 +57,10 @@ def _create_output_files(assembled_collage: PyPDF2.pdf.PageObject, lowerleft_factor = calc.Factor(x=0, y=0) upperright_factor = calc.Factor(x=1, y=1) - writer = PyPDF2.PdfFileWriter() - for x in range(0, calc.calculate_pages_needed(current_layout, n_up_factor)): - page = copy(assembled_collage) - # cf. https://stackoverflow.com/questions/52315259/pypdf2-cant-add-multiple-cropped-pages# + output = Pdf.new() + output.copy_foreign(collage.Root) # TODO must Root be updated if new pages are added? + for i in range(0, calc.calculate_pages_needed(current_layout, n_up_factor)): + page = output.copy_foreign(collage.pages[0]) lowerleft: core.Point = _calculate_lowerleft_point(lowerleft_factor, n_up_factor, pagesize) upperright: core.Point = _calculate_upperright_point(upperright_factor, n_up_factor, current_layout, pagesize) @@ -71,11 +69,10 @@ def _create_output_files(assembled_collage: PyPDF2.pdf.PageObject, colsleft = _calculate_colsrows_left(current_layout.columns, upperright_factor.x, n_up_factor.x) lowerleft_factor, upperright_factor = _adjust_factors(lowerleft_factor, upperright_factor, colsleft) - page.cropBox.lowerLeft = (lowerleft.x, lowerleft.y) - page.cropBox.upperRight = (upperright.x, upperright.y) - writer.addPage(page) + page.CropBox = [lowerleft.x, lowerleft.y, upperright.x, upperright.y] + output.pages.append(page) - return writer + return output def _calculate_colsrows_left(layout_element: int, factor: int, nup_factor: int) -> int: diff --git a/nobubo/output.py b/nobubo/output.py index bb52b22..a22ae29 100644 --- a/nobubo/output.py +++ b/nobubo/output.py @@ -15,17 +15,17 @@ # You should have received a copy of the GNU Affero General Public License # along with Nobubo. If not, see . import PyPDF2 +from pikepdf import Pdf import pathlib import sys from nobubo import core, calc -def write_chops(pypdf2_writer: PyPDF2.PdfFileWriter, output_path: pathlib.Path): +def write_chops(collage: Pdf, output_path: pathlib.Path): print("Writing file...") try: - with open(output_path, "wb") as output: - pypdf2_writer.write(output) + collage.save(output_path) except OSError as e: print(f"While writing the file, this error occurred:\n{e}") sys.exit(1) @@ -33,10 +33,7 @@ def write_chops(pypdf2_writer: PyPDF2.PdfFileWriter, output_path: pathlib.Path): def write_collage(temp_collage_paths: [pathlib.Path], output_properties: core.OutputProperties): for counter, collage_path in enumerate(temp_collage_paths): - writer = PyPDF2.PdfFileWriter() new_outputpath = calc.generate_new_outputpath(output_properties.output_path, counter) - with collage_path.open("rb") as collagefile: - reader = PyPDF2.PdfFileReader(collagefile, strict=False) - writer.addPage(reader.getPage(0)) - write_chops(writer, new_outputpath) + temp_collage = Pdf.open(collage_path) + temp_collage.save(new_outputpath) print(f"Collage written to {new_outputpath}. Enjoy your sewing :)") \ No newline at end of file From ef2b07bbb4b53fa354c04ec0faace33600d8e049 Mon Sep 17 00:00:00 2001 From: bytinbit Date: Mon, 4 Jan 2021 18:09:58 +0100 Subject: [PATCH 2/6] update requirements and setup.py --- requirements.txt | 3 ++- setup.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 60a3076..fc06e0b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ click >= 7.1.2 -PyPDF2 >= 1.26.0 +pikepdf >= 1.19.3 + diff --git a/setup.py b/setup.py index f777157..36c1cd1 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ license="AGPLv3", packages=setuptools.find_packages(), python_requires=">=3.7", - install_requires=["click", "PyPDF2"] + install_requires=["click", "pikepdf"], classifiers=[ "Topic :: Printing", "Topic :: Utilities", From 9248689b59eb76259901eb639927adf226c888e5 Mon Sep 17 00:00:00 2001 From: bytinbit Date: Mon, 4 Jan 2021 18:17:00 +0100 Subject: [PATCH 3/6] thank cfcurtis for the need to copy Root --- nobubo/disassembly.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nobubo/disassembly.py b/nobubo/disassembly.py index 6490056..c236858 100644 --- a/nobubo/disassembly.py +++ b/nobubo/disassembly.py @@ -59,6 +59,7 @@ def _create_output_files(collage: Pdf, output = Pdf.new() output.copy_foreign(collage.Root) # TODO must Root be updated if new pages are added? + # Root must be copied too, not only the page: thanks to https://github.com/cfcurtis/sewingutils for i in range(0, calc.calculate_pages_needed(current_layout, n_up_factor)): page = output.copy_foreign(collage.pages[0]) From 3b8101393e785f64b5d4d5a4a3529dd6c9a48e1b Mon Sep 17 00:00:00 2001 From: bytinbit Date: Tue, 5 Jan 2021 18:36:02 +0100 Subject: [PATCH 4/6] switch from PyPDF2 to pikepdf --- nobubo/calc.py | 26 +++++++++++++------------- nobubo/disassembly.py | 2 +- nobubo/output.py | 1 - 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/nobubo/calc.py b/nobubo/calc.py index 3049515..d43e53f 100644 --- a/nobubo/calc.py +++ b/nobubo/calc.py @@ -24,7 +24,7 @@ from dataclasses import dataclass from typing import List -import PyPDF2 +import pikepdf from nobubo import core @@ -41,15 +41,13 @@ class Factor: def parse_cli_input(input_layout: (int, int, int), output_layout_cli: str, print_margin: int, reverse_assembly: bool, input_path: str, output_path: str ) -> (core.InputProperties, core.OutputProperties): - with open(pathlib.Path(input_path), "rb") as inputfile: - reader = PyPDF2.PdfFileReader(inputfile, strict=False) - - width, height = calculate_page_dimensions( - reader.getPage(1)) # first page (getPage(0)) may contain overview + with pikepdf.open(pathlib.Path(input_path)) as inputfile: + # first page (getPage(0)) may contain overview, so get second one + width, height = calculate_page_dimensions(inputfile.pages[1]) input_properties = core.InputProperties( input_filepath=pathlib.Path(input_path), output_path=pathlib.Path(output_path), - number_of_pages=reader.getNumPages(), + number_of_pages=len(inputfile.pages), pagesize=core.PageSize(width=width, height=height), layout=parse_input_layouts(input_layout), reverse_assembly=reverse_assembly) @@ -57,7 +55,7 @@ def parse_cli_input(input_layout: (int, int, int), output_layout_cli: str, print output_properties = core.OutputProperties(output_path=pathlib.Path(output_path), output_layout=parse_output_layout(output_layout_cli, print_margin), ) - return input_properties, output_properties + return input_properties, output_properties def parse_input_layouts(input_layout: (int, int, int)) ->[core.Layout]: @@ -83,15 +81,17 @@ def calculate_pages_needed(layout: core.Layout, n_up_factor: Factor) -> int: return math.ceil(layout.columns/n_up_factor.x) * math.ceil(layout.rows/n_up_factor.y) -def calculate_page_dimensions(page: PyPDF2.pdf.PageObject) -> (float, float): +def calculate_page_dimensions(page: pikepdf.Page) -> (float, float): """ Calculates the x, y value for the offset in default user space units as defined in the pdf standard. - Uses the cropBox value, since this is the area visible to the printer. - :param page: A pattern page. + :param page: A PDF page. :return: list with x, y value. """ - return round(float(page.cropBox[2])-float(page.cropBox[0]), 2), \ - round(float(page.cropBox[3])-float(page.cropBox[1]), 2) + if not hasattr(page, "CropBox"): + box = page.MediaBox + else: + box = page.CropBox + return round(float(box[2])-float(box[0]), 2), round(float(box[3])-float(box[1]), 2) def convert_to_userspaceunits(width_height: [int, int]) -> core.PageSize: diff --git a/nobubo/disassembly.py b/nobubo/disassembly.py index c236858..1d1d8fe 100644 --- a/nobubo/disassembly.py +++ b/nobubo/disassembly.py @@ -59,7 +59,7 @@ def _create_output_files(collage: Pdf, output = Pdf.new() output.copy_foreign(collage.Root) # TODO must Root be updated if new pages are added? - # Root must be copied too, not only the page: thanks to https://github.com/cfcurtis/sewingutils + # Root must be copied too, not only the page: thanks to https://github.com/cfcurtis/sewingutils for this! for i in range(0, calc.calculate_pages_needed(current_layout, n_up_factor)): page = output.copy_foreign(collage.pages[0]) diff --git a/nobubo/output.py b/nobubo/output.py index a22ae29..c74b1f5 100644 --- a/nobubo/output.py +++ b/nobubo/output.py @@ -14,7 +14,6 @@ # # You should have received a copy of the GNU Affero General Public License # along with Nobubo. If not, see . -import PyPDF2 from pikepdf import Pdf import pathlib import sys From ea0e6bd4d336aa876d20aad4453804d87a056887 Mon Sep 17 00:00:00 2001 From: bytinbit Date: Tue, 5 Jan 2021 18:36:15 +0100 Subject: [PATCH 5/6] adjust tests for pikepdf --- tests/conftest.py | 25 +++++++++++++------------ tests/test_calc.py | 3 --- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 2ff31cd..12312ea 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,6 @@ import pathlib -import PyPDF2 +# import PyPDF2 +import pikepdf import pytest import textract @@ -15,22 +16,27 @@ def __init__(self, outputdir: pathlib.Path) -> None: def read(self): for filepath in self.outputdir.glob("*.pdf"): - file = open(filepath, "rb") + file = pikepdf.open(filepath) self._files.append(file) - self.readers[filepath.name] = PyPDF2.PdfFileReader(file) + self.readers[filepath.name] = file return sorted(self.readers.keys()) def pagesize(self, filename: str, pagenumber: int=0) -> [float, float]: reader = self.readers[filename] - page = reader.getPage(pagenumber) - return [round(float(page.cropBox[2])-float(page.cropBox[0]), 2), round(float(page.cropBox[3])-float(page.cropBox[1]), 2)] + page = reader.pages[pagenumber] + if not hasattr(page, "CropBox"): + box = page.MediaBox + else: + box = page.CropBox + return [round(float(box[2])-float(box[0]), 2), + round(float(box[3])-float(box[1]), 2)] def pagecount(self, filename: str) -> int: reader = self.readers[filename] - return reader.getNumPages() + return len(reader.pages) # TODO is there a better way to check the order of the pages? - def pages_order(self, filepath: str, pageamount: int=1) -> [str, str]: + def pages_order(self, filepath: str) -> [str, str]: text = str(textract.process(filepath, encoding="utf-8"), "utf-8").split("\n\n") # texteract finds ascii value '\f' (form feed, \x0c) that must be removed res = list(filter(lambda a: a not in '\x0c', text)) @@ -80,11 +86,6 @@ def two_overviews() -> [core.Layout, core.Layout]: return [first, second] -@pytest.fixture() -def one_pdf_page_same_boxes() -> PyPDF2.pdf.PageObject: - return PyPDF2.pdf.PageObject.createBlankPage(None, 483.307, 729.917) - - @pytest.fixture() def n_up_factor_a0() -> calc.Factor: return calc.Factor(x=4, y=4) diff --git a/tests/test_calc.py b/tests/test_calc.py index 3a3e70e..1021714 100644 --- a/tests/test_calc.py +++ b/tests/test_calc.py @@ -16,9 +16,6 @@ def test_calculate_pages_needed_oneoverview_a0_unevenlayout(self, one_overview_u def test_calculate_pages_needed_oneoverview_custom_unevenlayout(self, one_overview_uneven, nup_factor_custom): assert calc.calculate_pages_needed(one_overview_uneven, nup_factor_custom) == 2 - def test_calculate_offset(self, one_pdf_page_same_boxes): - assert calc.calculate_page_dimensions(one_pdf_page_same_boxes) == (483.31, 729.92) - def test_userspaceunits_conversion_a0(self): paper = calc.convert_to_userspaceunits([841, 1189]) assert paper.width == 2383.937 From c091189a21e3bc89133544f3093bc8b87b6f5a07 Mon Sep 17 00:00:00 2001 From: bytinbit Date: Mon, 11 Jan 2021 15:23:45 +0100 Subject: [PATCH 6/6] update version in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 36c1cd1..a9c17c3 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="nobubo-bytinbit", - version="1.1.0", + version="1.2.0", description="Nobubo assembles a digital pdf sewing pattern and chops it into a desired output size to be printed.", long_description=long_description, long_description_content_type="text/markdown",