diff --git a/mupdf/libmupdf.wasm b/mupdf/libmupdf.wasm index 5dcb27a..3db047b 100644 Binary files a/mupdf/libmupdf.wasm and b/mupdf/libmupdf.wasm differ diff --git a/tests/assets/fti_filing_p25.pdf b/tests/assets/fti_filing_p25.pdf new file mode 100644 index 0000000..8a59149 Binary files /dev/null and b/tests/assets/fti_filing_p25.pdf differ diff --git a/tests/module/importPdfText.spec.js b/tests/module/importPdfText.spec.js index f86d7fa..7f4a3c2 100644 --- a/tests/module/importPdfText.spec.js +++ b/tests/module/importPdfText.spec.js @@ -247,6 +247,21 @@ describe('Check that PDF imports split lines correctly.', function () { }); }).timeout(120000); +describe('Check that PDF imports split words correctly.', function () { + this.timeout(10000); + + it('Should correctly split words not separated by space or any character defined in may_add_space', async () => { + await scribe.importFiles([`${ASSETS_PATH_KARMA}/fti_filing_p25.pdf`]); + + assert.strictEqual(scribe.data.ocr.active[0].lines[4].words[0].text, '☒'); + assert.strictEqual(scribe.data.ocr.active[0].lines[4].words[1].text, 'ANNUAL'); + }).timeout(10000); + + after(async () => { + await scribe.terminate(); + }); +}).timeout(120000); + describe('Check that line baselines are imported correctly.', function () { this.timeout(10000);