From 0dd880122d1fb0a40dbab7539eff2362c9178a2f Mon Sep 17 00:00:00 2001 From: Balearica Date: Mon, 9 Sep 2024 20:21:16 -0700 Subject: [PATCH] Updated PDFs to render in higher resolution --- js/containers/imageContainer.js | 2 +- js/extractPDFText.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/js/containers/imageContainer.js b/js/containers/imageContainer.js index c9fed34..9922279 100644 --- a/js/containers/imageContainer.js +++ b/js/containers/imageContainer.js @@ -525,7 +525,7 @@ export class ImageCache { // For reasons that are unclear, a small number of pages have been rendered into massive files // so a hard-cap on resolution must be imposed. - const pageDPI = ImageCache.pdfDims300.map((x) => 300 * 2000 / x.width, 2000); + const pageDPI = ImageCache.pdfDims300.map((x) => 300 * Math.min(x.width, 3500) / x.width); // In addition to capping the resolution, also switch the width/height ImageCache.pdfDims300.forEach((x, i) => { diff --git a/js/extractPDFText.js b/js/extractPDFText.js index b4359d4..da2a78d 100644 --- a/js/extractPDFText.js +++ b/js/extractPDFText.js @@ -21,7 +21,7 @@ const extractInternalPDFTextRaw = async () => { }; const stextArr = /** @type {Array} */ ([]); - const pageDPI = ImageCache.pdfDims300.map((x) => 300 * 2000 / x.width, 2000); + const pageDPI = ImageCache.pdfDims300.map((x) => 300 * Math.min(x.width, 3500) / x.width); const resArr = pageDPI.map(async (x, i) => { // While using `pageTextJSON` would save some parsing, unfortunately that format only includes line-level granularity. // The XML format is the only built-in mupdf format that includes character-level granularity.