From 0f16bb582b95942d20900ac032ca9dcba8f545b7 Mon Sep 17 00:00:00 2001 From: Balearica Date: Mon, 28 Oct 2024 04:50:38 -0700 Subject: [PATCH] Prevented calcSuppFontInfo from running with offscreen text --- js/fontSupp.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/js/fontSupp.js b/js/fontSupp.js index d0947cc..63a0dac 100644 --- a/js/fontSupp.js +++ b/js/fontSupp.js @@ -86,6 +86,9 @@ export const calcSuppFontInfo = async (ocrArr) => { if (word.font) { if (skipFonts.has(word.font)) { continue; + // Printing words off screen is a common method of hiding text in PDFs. + } else if (word.bbox.left < 0 || word.bbox.top < 0 || word.bbox.right > page.dims.width || word.bbox.bottom > page.dims.height) { + continue; } else if (!calcFonts.has(word.font)) { const sansSerifUnknown = determineSansSerif(word.font) === 'Default'; if (sansSerifUnknown || !word.visualCoords) {