From 2977fd58baccc75f5e9047de4291857d1474b956 Mon Sep 17 00:00:00 2001 From: Balearica Date: Sun, 1 Sep 2024 23:55:21 -0700 Subject: [PATCH] Reworked font storage and optimization to improve performance --- js/containers/app.js | 2 - js/containers/dataContainer.js | 6 -- js/containers/fontContainer.js | 99 +++++++++++------ js/export/exportPDF.js | 13 ++- js/fontContainerMain.js | 191 +++++++++++++++------------------ js/fontEval.js | 172 ++++++++++++----------------- js/generalWorkerMain.js | 15 ++- js/import/import.js | 11 +- js/recognizeConvert.js | 4 +- js/worker/compareOCRModule.js | 30 +++--- js/worker/generalWorker.js | 22 ++-- js/worker/renderWordCanvas.js | 4 +- scribe.js | 2 +- tests/cli/cli.spec.js | 3 +- tests/module/recognize.spec.js | 10 +- 15 files changed, 285 insertions(+), 299 deletions(-) diff --git a/js/containers/app.js b/js/containers/app.js index 0f1c3e1..e227a73 100644 --- a/js/containers/app.js +++ b/js/containers/app.js @@ -5,8 +5,6 @@ export class opt { static extractText = false; - static enableOpt = false; - static enableUpscale = false; static ignorePunct = false; diff --git a/js/containers/dataContainer.js b/js/containers/dataContainer.js index b8429bd..b15ec4d 100644 --- a/js/containers/dataContainer.js +++ b/js/containers/dataContainer.js @@ -79,12 +79,6 @@ export const pageMetricsArr = []; export class DebugData { /** @type {{[key: string]: Array> | undefined}} */ static debugImg = {}; - - /** @type {?Awaited>} */ - static evalRaw; - - /** @type {?Awaited>} */ - static evalOpt; } /** @type {Array>>} */ diff --git a/js/containers/fontContainer.js b/js/containers/fontContainer.js index 06cc7f9..d7ecee0 100644 --- a/js/containers/fontContainer.js +++ b/js/containers/fontContainer.js @@ -225,15 +225,9 @@ export class FontCont { /** @type {?FontContainer} */ static raw = null; - /** @type {?FontContainer} */ - static optInitial = null; - /** @type {?FontContainer} */ static opt = null; - /** @type {?FontContainer} */ - static active = null; - /** @type {?FontContainer} */ static export = null; @@ -242,30 +236,56 @@ export class FontCont { chi_sim: null, }; + /** Optimized fonts will be used when believed to improve quality. */ + static enableOpt = false; + + /** Optimized fonts will always be used when they exist, even if believed to reduce quality. */ + static forceOpt = false; + + /** @type {?Awaited>} */ + static rawMetrics = null; + + /** @type {?Awaited>} */ + static optMetrics = null; + static defaultFontName = 'SerifDefault'; static serifDefaultName = 'NimbusRomNo9L'; static sansDefaultName = 'NimbusSans'; - static loadedBuiltInRawWorker = false; - - static loadedBuiltInOptWorker = false; - /** @type {?('latin'|'all')} */ static glyphSet = null; /** - * Get raw/opt/active font, and throw exception if it does not exist. - * This method only exists for type inference purposes, as raw/opt/active may be accessed directly, but may be `null`. - * This method should therefore only be used in cases where an exception on `null` is a desirable behavior. - * @param {('raw'|'opt'|'active'|'optInitial')} container - * @returns {FontContainer} + * Decide whether to use the optimized version of a font family. + * Note that even when this function returns `true`, optimized versions of every style will not exist. + * @param {string} family - Font family name. */ - static getContainer = (container) => { - const fontRes = FontCont[container]; - if (!fontRes) throw new Error(`${container} font container does not exist.`); - return fontRes; + static useOptFamily = (family) => { + const raw = FontCont.raw?.[family]?.normal; + if (!raw) return false; + const opt = FontCont.opt?.[family]?.normal; + if (opt && FontCont.forceOpt) { + return true; + // If optimized fonts are enabled (but not forced), the optimized version of a font will be used if: + // (1) The optimized version exists + // (2) The optimized version has a better metric (so quality should improve). + // (3) The optimized version of the default sans/serif font also has a better metric. + // This last condition avoids font optimization being enabled in the UI when it only improves an unused font. + } if (opt && FontCont.enableOpt) { + const defaultFamily = raw.type === 'serif' ? FontCont.serifDefaultName : FontCont.sansDefaultName; + + const rawMetricDefault = FontCont.rawMetrics?.[defaultFamily]; + const optMetricDefault = FontCont.optMetrics?.[defaultFamily]; + + const rawMetric = FontCont.rawMetrics?.[family]; + const optMetric = FontCont.optMetrics?.[family]; + if (rawMetric && optMetric && optMetric < rawMetric && optMetricDefault < rawMetricDefault) { + return true; + } + } + return false; }; /** @@ -275,20 +295,19 @@ export class FontCont { * @param {('Default'|'SansDefault'|'SerifDefault'|string)} family - Font family name. * @param {('normal'|'italic'|'bold'|string)} [style='normal'] * @param {string} [lang='eng'] - * @param {('raw'|'opt'|'active'|'optInitial')} [container='active'] * @returns {FontContainerFont} */ - static getFont = (family, style = 'normal', lang = 'eng', container = 'active') => { - const fontCont = FontCont.getContainer(container); - + static getFont = (family, style = 'normal', lang = 'eng') => { if (lang === 'chi_sim') { if (!FontCont.supp.chi_sim) throw new Error('chi_sim font does not exist.'); return FontCont.supp.chi_sim; } + if (!FontCont.raw) throw new Error('Raw fonts not yet initialized.'); + // Option 1: If we have access to the font, use it. // Option 2: If we do not have access to the font, but it closely resembles a built-in font, use the built-in font. - if (!fontCont?.[family]?.[style]) { + if (!FontCont.raw?.[family]?.[style]) { if (/Times/i.test(family)) { family = 'NimbusRomNo9L'; } else if (/Helvetica/i.test(family)) { @@ -309,7 +328,7 @@ export class FontCont { } // Option 3: If the font still is not identified, use the default sans/serif font. - if (!fontCont?.[family]?.[style]) { + if (!FontCont.raw?.[family]?.[style]) { family = determineSansSerif(family); } @@ -318,31 +337,43 @@ export class FontCont { if (family === 'SerifDefault') family = FontCont.serifDefaultName; if (family === 'SansDefault') family = FontCont.sansDefaultName; - const fontRes = fontCont[family][style]; + + /** @type {FontContainerFont} */ + let fontRes = FontCont.raw?.[family]?.[style]; if (!fontRes) throw new Error(`Font container does not contain ${family} (${style}).`); + + const opt = FontCont.opt?.[family]?.[style]; + const useOpt = FontCont.useOptFamily(family); + if (opt && useOpt) fontRes = opt; + return fontRes; }; /** * * @param {OcrWord} word - * @param {('raw'|'opt'|'active'|'optInitial')} [container='active'] */ - static getWordFont = (word, container = 'active') => { + static getWordFont = (word) => { const wordFontFamily = word.font || FontCont.defaultFontName; - return FontCont.getFont(wordFontFamily, word.style, word.lang, container); + return FontCont.getFont(wordFontFamily, word.style, word.lang); }; + /** + * Reset font container to original state but do not unload default resources. + */ static clear = () => { - FontCont.active = FontCont.raw; - FontCont.optInitial = null; FontCont.opt = null; - FontCont.loadedBuiltInRawWorker = false; - FontCont.loadedBuiltInOptWorker = false; - FontCont.glyphSet = null; + FontCont.rawMetrics = null; + FontCont.optMetrics = null; FontCont.defaultFontName = 'SerifDefault'; FontCont.serifDefaultName = 'NimbusRomNo9L'; FontCont.sansDefaultName = 'NimbusSans'; }; + + static terminate = () => { + FontCont.clear(); + FontCont.raw = null; + FontCont.glyphSet = null; + }; } diff --git a/js/export/exportPDF.js b/js/export/exportPDF.js index 4aa5c07..37ffff5 100644 --- a/js/export/exportPDF.js +++ b/js/export/exportPDF.js @@ -33,9 +33,7 @@ import ocr from '../objects/ocrObjects.js'; */ export async function hocrToPDF(hocrArr, minpage = 0, maxpage = -1, textMode = 'ebook', rotateText = false, rotateBackground = false, dimsLimit = { width: -1, height: -1 }, confThreshHigh = 85, confThreshMed = 75, proofOpacity = 0.8) { - // TODO: Currently, all fonts are added to the PDF, and mupdf removes the unused fonts. - // It would likely be more performant to only add the fonts that are actually used up front. - const exportFontObj = FontCont.getContainer('active'); + if (!FontCont.raw) throw new Error('No fonts loaded.'); if (maxpage === -1) { maxpage = hocrArr.length - 1; @@ -47,13 +45,20 @@ export async function hocrToPDF(hocrArr, minpage = 0, maxpage = -1, textMode = ' // Add fonts // All fonts are added at this step. // The fonts that are not used will be removed by muPDF later. + // TODO: It would likely be more performant to only add the fonts that are actually used up front. let fontI = 0; let objectI = 3; const pdfFonts = {}; /** @type {Array} */ const pdfFontObjStrArr = []; let pdfFontsStr = ''; - for (const [familyKey, familyObj] of Object.entries(exportFontObj)) { + for (const familyKey of Object.keys(FontCont.raw)) { + const useOpt = FontCont.useOptFamily(familyKey); + const familyObj = { + normal: useOpt && FontCont.opt?.[familyKey]?.normal ? FontCont.opt[familyKey].normal : FontCont.raw[familyKey].normal, + italic: useOpt && FontCont.opt?.[familyKey]?.italic ? FontCont.opt[familyKey].italic : FontCont.raw[familyKey].italic, + bold: useOpt && FontCont.opt?.[familyKey]?.bold ? FontCont.opt[familyKey].bold : FontCont.raw[familyKey].bold, + }; pdfFonts[familyKey] = {}; for (const [key, value] of Object.entries(familyObj)) { const font = await value.opentype; diff --git a/js/fontContainerMain.js b/js/fontContainerMain.js index cc88455..105bc9e 100644 --- a/js/fontContainerMain.js +++ b/js/fontContainerMain.js @@ -15,10 +15,7 @@ import { gs } from './generalWorkerMain.js'; */ export async function loadBuiltInFontsRaw(glyphSet = 'latin') { // Return early if the font set is already loaded, or a superset of the requested set is loaded. - if (FontCont.glyphSet === glyphSet || FontCont.glyphSet === 'all' && glyphSet === 'latin') { - console.log('Built-in fonts already loaded.'); - return; - } + if (FontCont.glyphSet === glyphSet || FontCont.glyphSet === 'all' && glyphSet === 'latin') return; FontCont.glyphSet = glyphSet; @@ -114,13 +111,13 @@ export async function loadBuiltInFontsRaw(glyphSet = 'latin') { }; FontCont.raw = await /** @type {FontContainer} */(/** @type {any} */(loadFontsFromSource(srcObj))); - if (!FontCont.active || (!FontCont.active.NimbusSans.normal.opt && !FontCont.active.NimbusRomNo9L.normal.opt)) FontCont.active = FontCont.raw; if (typeof process === 'undefined') { // This assumes that the scheduler `init` method has at least started. if (gs.schedulerReady === null) console.warn('Failed to load fonts to workers as workers have not been initialized yet.'); await gs.schedulerReady; - await setBuiltInFontsWorkers(gs.schedulerInner, true); + // If this is running, presumably a new glyphset is being loaded, so the fonts should be forced to be updated. + await updateFontContWorkerMain({ loadRaw: true }); } return; @@ -155,105 +152,89 @@ export async function loadChiSimFont() { } /** - * - * @param {boolean} enable - * @param {boolean} [useInitial=false] - * @param {boolean} [forceWorkerUpdate=false] - If true, forces the worker to update the font data even if the font data of this type is already loaded. - * This should be used when switching from unvalidated to validated optimized fonts. + * Enable or disable font optimization settings. + * This function is used rather than exposing the settings using the `opt` object, as these settings exist on the font container in both the main thread and the worker threads. + * @param {boolean} enableOpt + * @param {boolean} [forceOpt] */ -export async function enableFontOpt(enable, useInitial = false, forceWorkerUpdate = false) { - // Enable/disable optimized font - if (enable && useInitial && FontCont.optInitial) { - FontCont.active = FontCont.optInitial; - } else if (enable && FontCont.opt) { - FontCont.active = FontCont.opt; - } else { - FontCont.active = FontCont.raw; +export async function enableFontOpt(enableOpt, forceOpt) { + let change = false; + if (enableOpt === true || enableOpt === false) { + if (FontCont.enableOpt !== enableOpt) { + change = true; + FontCont.enableOpt = enableOpt; + } + } + if (forceOpt === true || forceOpt === false) { + if (FontCont.forceOpt !== forceOpt) { + change = true; + FontCont.forceOpt = forceOpt; + } } - // Enable/disable optimized font in workers - if (typeof process === 'undefined') { - await setBuiltInFontsWorkers(gs.schedulerInner, forceWorkerUpdate); - } else { - // const { setFontAll } = await import('./worker/compareOCRModule.js'); - // setFontAll(fontAll); + if (typeof process === 'undefined' && change) { + await updateFontContWorkerMain(); } } /** - * - * @param {*} scheduler - * @param {boolean} [force=false] - If true, forces the worker to update the font data even if the font data of this type is already loaded. + * @param {Object} [params] + * @param {boolean} [params.loadRaw] - By default, raw fonts are loaded if they have not been loaded before. + * Set `loadRaw` to `true` or `false` to force the raw fonts to be loaded or not loaded, respectively. + * @param {boolean} [params.loadOpt] - By default, optimized fonts are loaded if they have not been loaded before. + * Set `loadOpt` to `true` or `false` to force the optimized fonts to be loaded or not loaded, respectively. + * @param {boolean} [params.updateProps] */ -export async function setBuiltInFontsWorkers(scheduler, force = false) { - if (!FontCont.active) { - console.error('Font data not loaded.'); - return; - } - - const opt = FontCont.active.Carlito.normal.opt || FontCont.active.NimbusRomNo9L.normal.opt; - - const loadedBuiltIn = (!opt && FontCont.loadedBuiltInRawWorker) || (opt && FontCont.loadedBuiltInOptWorker); +export async function updateFontContWorkerMain(params = {}) { + const loadRaw = params.loadRaw === true || (params.loadRaw !== false && FontCont.raw && !gs.loadedBuiltInRawWorker); + const loadOpt = params.loadOpt === true || (params.loadOpt !== false && FontCont.opt && !gs.loadedBuiltInOptWorker); // If the active font data is not already loaded, load it now. // This assumes that only one version of the raw/optimized fonts ever exist-- // it does not check whether the current optimized font changed since it was last loaded. - if (!loadedBuiltIn || force) { + for (const [type, load] of [['raw', loadRaw], ['opt', loadOpt]]) { + if (!load) continue; + const resArr = []; - for (let i = 0; i < scheduler.workers.length; i++) { - const worker = scheduler.workers[i]; - const res = worker.loadFontsWorker({ - src: { - Carlito: { - normal: FontCont.active.Carlito.normal.src, - italic: FontCont.active.Carlito.italic.src, - bold: FontCont.active.Carlito.bold.src, - }, - Century: { - normal: FontCont.active.Century.normal.src, - italic: FontCont.active.Century.italic.src, - bold: FontCont.active.Century.bold.src, - }, - Garamond: { - normal: FontCont.active.Garamond.normal.src, - italic: FontCont.active.Garamond.italic.src, - bold: FontCont.active.Garamond.bold.src, - }, - Palatino: { - normal: FontCont.active.Palatino.normal.src, - italic: FontCont.active.Palatino.italic.src, - bold: FontCont.active.Palatino.bold.src, - }, - NimbusRomNo9L: { - normal: FontCont.active.NimbusRomNo9L.normal.src, - italic: FontCont.active.NimbusRomNo9L.italic.src, - bold: FontCont.active.NimbusRomNo9L.bold.src, - }, - NimbusSans: { - normal: FontCont.active.NimbusSans.normal.src, - italic: FontCont.active.NimbusSans.italic.src, - bold: FontCont.active.NimbusSans.bold.src, - }, - }, - opt, - }); - resArr.push(res); + + const input = { opt: type === 'opt', src: {} }; + for (const [key, value] of Object.entries(FontCont[type])) { + if (!value || !value.normal) continue; + input.src[key] = { + normal: value.normal.src, + }; + if (value.italic) input.src[key].italic = value.italic.src; + if (value.bold) input.src[key].bold = value.bold.src; } - await Promise.all(resArr); - // Theoretically this should be changed to use promises to avoid the race condition when `setBuiltInFontsWorkers` is called multiple times quickly and `loadFontsWorker` is still running. - if (opt) { - FontCont.loadedBuiltInOptWorker = true; - } else { - FontCont.loadedBuiltInRawWorker = true; + for (let i = 0; i < gs.schedulerInner.workers.length; i++) { + const worker = gs.schedulerInner.workers[i]; + const res = worker.loadFontsWorker(input); + resArr.push(res); + + // TODO: consider the race condition when `setBuiltInFontsWorkers` is called multiple times quickly and `loadFontsWorker` is still running. + if (type === 'opt') { + gs.loadedBuiltInOptWorker = true; + } else { + gs.loadedBuiltInRawWorker = true; + } } + await Promise.all(resArr); } // Set the active font in the workers to match the active font in `fontAll` const resArr = []; - for (let i = 0; i < scheduler.workers.length; i++) { - const worker = scheduler.workers[i]; - const res = worker.setFontActiveWorker({ opt, sansDefaultName: FontCont.sansDefaultName, serifDefaultName: FontCont.serifDefaultName }); + for (let i = 0; i < gs.schedulerInner.workers.length; i++) { + const worker = gs.schedulerInner.workers[i]; + const res = worker.updateFontContWorker({ + rawMetrics: FontCont.rawMetrics, + optMetrics: FontCont.optMetrics, + sansDefaultName: FontCont.sansDefaultName, + serifDefaultName: FontCont.serifDefaultName, + defaultFontName: FontCont.defaultFontName, + enableOpt: FontCont.enableOpt, + forceOpt: FontCont.forceOpt, + }); resArr.push(res); } await Promise.all(resArr); @@ -261,7 +242,7 @@ export async function setBuiltInFontsWorkers(scheduler, force = false) { /** * WIP: Import fonts embedded in PDFs. - * This function is not currently used. + * This function is out of date and not currently used. * @param {*} scheduler */ export async function setUploadFontsWorker(scheduler) { @@ -295,7 +276,15 @@ export async function setUploadFontsWorker(scheduler) { const opt = FontCont.active.Carlito.normal.opt || FontCont.active.NimbusRomNo9L.normal.opt; for (let i = 0; i < scheduler.workers.length; i++) { const worker = scheduler.workers[i]; - const res = worker.setFontActiveWorker({ opt, sansDefaultName: FontCont.sansDefaultName, serifDefaultName: FontCont.serifDefaultName }); + const res = worker.updateFontContWorker({ + rawMetrics: FontCont.rawMetrics, + optMetrics: FontCont.optMetrics, + sansDefaultName: FontCont.sansDefaultName, + serifDefaultName: FontCont.serifDefaultName, + defaultFontName: FontCont.defaultFontName, + enableOpt: FontCont.enableOpt, + forceOpt: FontCont.forceOpt, + }); resArr.push(res); } await Promise.all(resArr); @@ -321,7 +310,7 @@ export function setDefaultFontAuto(fontMetricsObj) { if (gs.schedulerInner) { for (let i = 0; i < gs.schedulerInner.workers.length; i++) { const worker = gs.schedulerInner.workers[i]; - worker.setDefaultFontNameWorker({ defaultFontName: FontCont.defaultFontName }); + worker.updateFontContWorker({ defaultFontName: FontCont.defaultFontName }); } } } @@ -348,14 +337,8 @@ export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) { // If there are no statistics to use for optimization, create "optimized" font by simply copying the raw font without modification. // This should only occur when `multiFontMode` is true, but a document contains no sans words or no serif words. - if (!fontMetricsObj[fontMetricsType] || !fontMetricsObj[fontMetricsType][fontFamily.normal.style]) { - const opentypeFontArr = await Promise.all([loadOpentype(fontFamily.normal.src, null), loadOpentype(fontFamily.italic.src, null), loadOpentype(fontFamily.bold.src, null)]); - const normalOptFont = new FontContainerFont(fontFamily.normal.family, fontFamily.normal.style, fontFamily.normal.src, true, opentypeFontArr[0]); - const italicOptFont = new FontContainerFont(fontFamily.italic.family, fontFamily.italic.style, fontFamily.italic.src, true, opentypeFontArr[1]); - const boldOptFont = new FontContainerFont(fontFamily.bold.family, fontFamily.bold.style, fontFamily.bold.src, true, opentypeFontArr[2]); - return { - normal: await normalOptFont, italic: await italicOptFont, bold: await boldOptFont, - }; + if (!fontMetricsObj[fontMetricsType] || !fontMetricsObj[fontMetricsType][fontFamily.normal.style] || fontMetricsObj[fontMetricsType][fontFamily.normal.style].obs < 200) { + return null; } const metricsNormal = fontMetricsObj[fontMetricsType][fontFamily.normal.style]; @@ -365,30 +348,26 @@ export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) { return new FontContainerFont(fontFamily.normal.family, fontFamily.normal.style, x.fontData, true, font); }); - const metricsItalic = fontMetricsObj[fontMetricsType][fontFamily.italic.style]; - /** @type {FontContainerFont|Promise} */ - let italicOptFont; + const metricsItalic = fontMetricsObj[fontMetricsType][fontFamily.italic.style] && fontMetricsObj[fontMetricsType][fontFamily.italic.style].obs >= 200; + /** @type {?FontContainerFont|Promise} */ + let italicOptFont = null; if (metricsItalic) { italicOptFont = gs.scheduler.optimizeFont({ fontData: fontFamily.italic.src, fontMetricsObj: metricsItalic, style: fontFamily.italic.style }) .then(async (x) => { const font = await loadOpentype(x.fontData, x.kerningPairs); return new FontContainerFont(fontFamily.italic.family, fontFamily.italic.style, x.fontData, true, font); }); - } else { - const font = await loadOpentype(fontFamily.italic.src, null); - italicOptFont = new FontContainerFont(fontFamily.italic.family, fontFamily.italic.style, fontFamily.italic.src, true, font); } // Bold fonts are not optimized, as we currently have no accurate way to determine if characters are bold within OCR, so do not have bold metrics. - const boldOptFont = loadOpentype(fontFamily.bold.src, null).then((opentypeFont) => new FontContainerFont(fontFamily.bold.family, fontFamily.bold.style, fontFamily.bold.src, true, opentypeFont)); - return { - normal: await normalOptFont, italic: await italicOptFont, bold: await boldOptFont, + normal: await normalOptFont, italic: await italicOptFont, bold: null, }; } /** * Optimize all fonts. + * If a font cannot be optimized, then the raw font is returned. * @param {Object} fontPrivate * @param {Object.} fontMetricsObj */ @@ -402,6 +381,8 @@ export async function optimizeFontContainerAll(fontPrivate, fontMetricsObj) { const results = await Promise.all([carlitoPromise, centuryPromise, garamondPromise, palatinoPromise, nimbusRomNo9LPromise, nimbusSansPromise]); + if (results.every((x) => x === null)) return null; + return { Carlito: results[0], Century: results[1], diff --git a/js/fontEval.js b/js/fontEval.js index a811849..4a5c8be 100644 --- a/js/fontEval.js +++ b/js/fontEval.js @@ -1,16 +1,16 @@ -import { DebugData, fontMetricsObj, pageMetricsArr } from './containers/dataContainer.js'; +import { fontMetricsObj, pageMetricsArr } from './containers/dataContainer.js'; import { FontCont } from './containers/fontContainer.js'; import { ImageCache } from './containers/imageContainer.js'; import { - enableFontOpt, loadBuiltInFontsRaw, optimizeFontContainerAll, setDefaultFontAuto, + updateFontContWorkerMain, } from './fontContainerMain.js'; import { gs } from './generalWorkerMain.js'; /** * Evaluate how well a font matches the provided array of pages. - * @param {FontContainerFamily} font + * @param {string} font - Name of font family. * @param {Array} pageArr * @param {boolean} opt - Whether to use optimized fonts. * @param {number} n - Number of words to compare @@ -33,7 +33,7 @@ export async function evalPagesFont(font, pageArr, opt, n = 500) { const { evalPageFont } = await import('./worker/compareOCRModule.js'); res = await evalPageFont({ - font: font.normal.family, + font, page: pageArr[i], binaryImage: imageI, pageMetricsObj: pageMetricsArr[i], @@ -42,7 +42,7 @@ export async function evalPagesFont(font, pageArr, opt, n = 500) { // Browser case } else { res = await gs.scheduler.evalPageFont({ - font: font.normal.family, + font, page: pageArr[i], binaryImage: imageI, pageMetricsObj: pageMetricsArr[i], @@ -62,26 +62,28 @@ export async function evalPagesFont(font, pageArr, opt, n = 500) { * @param {boolean} opt - Whether to use optimized fonts. */ export async function evaluateFonts(pageArr, opt) { - const fontActive = FontCont.getContainer('active'); - - const debug = false; + const evalCarlito = !!(opt ? FontCont.opt?.Carlito : FontCont.raw?.Carlito); + const evalNimbusSans = !!(opt ? FontCont.opt?.NimbusSans : FontCont.raw?.NimbusSans); + const evalCentury = !!(opt ? FontCont.opt?.Century : FontCont.raw?.Century); + const evalPalatino = !!(opt ? FontCont.opt?.Palatino : FontCont.raw?.Palatino); + const evalGaramond = !!(opt ? FontCont.opt?.Garamond : FontCont.raw?.Garamond); + const evalNimbusRomNo9L = !!(opt ? FontCont.opt?.NimbusRomNo9L : FontCont.raw?.NimbusRomNo9L); // The browser version runs in parallel using workers, however the Node.js version runs sequentially, // as the canvas package does not support workers, and trying to run in parallel causes problems. // The logic is the same in both versions. - let sansMetrics; - let serifMetrics; + let fontMetricsTmp; if (typeof process === 'undefined') { const fontMetricsPromises = { - carlito: evalPagesFont(fontActive.Carlito, pageArr, opt), - nimbusSans: evalPagesFont(fontActive.NimbusSans, pageArr, opt), - century: evalPagesFont(fontActive.Century, pageArr, opt), - palatino: evalPagesFont(fontActive.Palatino, pageArr, opt), - garamond: evalPagesFont(fontActive.Garamond, pageArr, opt), - nimbusRomNo9L: evalPagesFont(fontActive.NimbusRomNo9L, pageArr, opt), + carlito: evalCarlito ? evalPagesFont('Carlito', pageArr, opt) : null, + nimbusSans: evalNimbusSans ? evalPagesFont('NimbusSans', pageArr, opt) : null, + century: evalCentury ? evalPagesFont('Century', pageArr, opt) : null, + palatino: evalPalatino ? evalPagesFont('Palatino', pageArr, opt) : null, + garamond: evalGaramond ? evalPagesFont('Garamond', pageArr, opt) : null, + nimbusRomNo9L: evalNimbusRomNo9L ? evalPagesFont('NimbusRomNo9L', pageArr, opt) : null, }; - const fontMetrics = { + fontMetricsTmp = { carlito: await fontMetricsPromises.carlito, nimbusSans: await fontMetricsPromises.nimbusSans, century: await fontMetricsPromises.century, @@ -89,46 +91,39 @@ export async function evaluateFonts(pageArr, opt) { garamond: await fontMetricsPromises.garamond, nimbusRomNo9L: await fontMetricsPromises.nimbusRomNo9L, }; - - sansMetrics = { - Carlito: fontMetrics.carlito.metricTotal / fontMetrics.carlito.wordsTotal, - NimbusSans: fontMetrics.nimbusSans.metricTotal / fontMetrics.nimbusSans.wordsTotal, - }; - - serifMetrics = { - Century: fontMetrics.century.metricTotal / fontMetrics.century.wordsTotal, - Palatino: fontMetrics.palatino.metricTotal / fontMetrics.palatino.wordsTotal, - Garamond: fontMetrics.garamond.metricTotal / fontMetrics.garamond.wordsTotal, - NimbusRomNo9L: fontMetrics.nimbusRomNo9L.metricTotal / fontMetrics.nimbusRomNo9L.wordsTotal, - }; } else { - const fontMetrics = { - Carlito: await evalPagesFont(fontActive.Carlito, pageArr, opt), - NimbusSans: await evalPagesFont(fontActive.NimbusSans, pageArr, opt), - Century: await evalPagesFont(fontActive.Century, pageArr, opt), - Palatino: await evalPagesFont(fontActive.Palatino, pageArr, opt), - Garamond: await evalPagesFont(fontActive.Garamond, pageArr, opt), - NimbusRomNo9L: await evalPagesFont(fontActive.NimbusRomNo9L, pageArr, opt), + fontMetricsTmp = { + carlito: evalCarlito ? await evalPagesFont('Carlito', pageArr, opt) : null, + nimbusSans: evalNimbusSans ? await evalPagesFont('NimbusSans', pageArr, opt) : null, + century: evalCentury ? await evalPagesFont('Century', pageArr, opt) : null, + palatino: evalPalatino ? await evalPagesFont('Palatino', pageArr, opt) : null, + garamond: evalGaramond ? await evalPagesFont('Garamond', pageArr, opt) : null, + nimbusRomNo9L: evalNimbusRomNo9L ? await evalPagesFont('NimbusRomNo9L', pageArr, opt) : null, }; + } - sansMetrics = { - Carlito: fontMetrics.Carlito.metricTotal / fontMetrics.Carlito.wordsTotal, - NimbusSans: fontMetrics.NimbusSans.metricTotal / fontMetrics.NimbusSans.wordsTotal, - }; + const fontMetrics = { + Carlito: fontMetricsTmp.carlito ? fontMetricsTmp.carlito.metricTotal / fontMetricsTmp.carlito.wordsTotal : null, + NimbusSans: fontMetricsTmp.nimbusSans ? fontMetricsTmp.nimbusSans.metricTotal / fontMetricsTmp.nimbusSans.wordsTotal : null, + Century: fontMetricsTmp.century ? fontMetricsTmp.century.metricTotal / fontMetricsTmp.century.wordsTotal : null, + Palatino: fontMetricsTmp.palatino ? fontMetricsTmp.palatino.metricTotal / fontMetricsTmp.palatino.wordsTotal : null, + Garamond: fontMetricsTmp.garamond ? fontMetricsTmp.garamond.metricTotal / fontMetricsTmp.garamond.wordsTotal : null, + NimbusRomNo9L: fontMetricsTmp.nimbusRomNo9L ? fontMetricsTmp.nimbusRomNo9L.metricTotal / fontMetricsTmp.nimbusRomNo9L.wordsTotal : null, + }; - serifMetrics = { - Century: fontMetrics.Century.metricTotal / fontMetrics.Century.wordsTotal, - Palatino: fontMetrics.Palatino.metricTotal / fontMetrics.Palatino.wordsTotal, - Garamond: fontMetrics.Garamond.metricTotal / fontMetrics.Garamond.wordsTotal, - NimbusRomNo9L: fontMetrics.NimbusRomNo9L.metricTotal / fontMetrics.NimbusRomNo9L.wordsTotal, - }; - } + return fontMetrics; +} +/** + * + * @param {Awaited>} fontMetrics + */ +const calcBestFonts = (fontMetrics) => { let minKeySans = 'NimbusSans'; let minValueSans = Number.MAX_VALUE; - for (const [key, value] of Object.entries(sansMetrics)) { - if (debug) console.log(`${key} metric: ${String(value)}`); + for (const [key, value] of Object.entries(fontMetrics)) { + if (!['Carlito', 'NimbusSans'].includes(key)) continue; if (value < minValueSans) { minValueSans = value; minKeySans = key; @@ -138,8 +133,8 @@ export async function evaluateFonts(pageArr, opt) { let minKeySerif = 'NimbusRomNo9L'; let minValueSerif = Number.MAX_VALUE; - for (const [key, value] of Object.entries(serifMetrics)) { - if (debug) console.log(`${key} metric: ${String(value)}`); + for (const [key, value] of Object.entries(fontMetrics)) { + if (!['Century', 'Palatino', 'Garamond', 'NimbusRomNo9L'].includes(key)) continue; if (value < minValueSerif) { minValueSerif = value; minKeySerif = key; @@ -147,12 +142,10 @@ export async function evaluateFonts(pageArr, opt) { } return { - sansMetrics, - serifMetrics, minKeySans, minKeySerif, }; -} +}; /** * Runs font optimization and validation. Sets `fontAll` defaults to best fonts, @@ -168,8 +161,6 @@ export async function evaluateFonts(pageArr, opt) { export async function runFontOptimization(ocrArr) { await loadBuiltInFontsRaw(); - const fontRaw = FontCont.getContainer('raw'); - const calculateOpt = fontMetricsObj && Object.keys(fontMetricsObj).length > 0; let enableOptSerif = false; @@ -179,17 +170,9 @@ export async function runFontOptimization(ocrArr) { if (calculateOpt) { setDefaultFontAuto(fontMetricsObj); - optimizeFontContainerAllPromise = optimizeFontContainerAll(fontRaw, fontMetricsObj) + optimizeFontContainerAllPromise = optimizeFontContainerAll(FontCont.raw, fontMetricsObj) .then((res) => { - FontCont.optInitial = res; - - // If no image data exists, then `opt` is set to `optInitial`. - // This behavior exists so that data can be loaded from previous sessions without changing the appearance of the document. - // Arguably, in cases where a user uploads raw OCR data and no images, using the raw font is more prudent than an unvalidated optimized font. - // If this ever comes up in actual usage and is a problem, then the behavior can be changed for that specific case. - if (!ImageCache.inputModes.image && !ImageCache.inputModes.pdf) { - FontCont.opt = { ...FontCont.optInitial }; - } + FontCont.opt = res; }); } @@ -205,60 +188,43 @@ export async function runFontOptimization(ocrArr) { await initCanvasNode(); } - const evalRaw = await evaluateFonts(ocrArr.slice(0, pageNum), false); - DebugData.evalRaw = evalRaw; + FontCont.rawMetrics = await evaluateFonts(ocrArr.slice(0, pageNum), false); + const bestMetricsRaw = calcBestFonts(FontCont.rawMetrics); await optimizeFontContainerAllPromise; - if (calculateOpt && Object.keys(FontCont.optInitial).length > 0) { - // Enable optimized fonts - await enableFontOpt(true, true, true); + if (FontCont.opt && Object.keys(FontCont.opt).length > 0) { + await updateFontContWorkerMain(); + + FontCont.optMetrics = await evaluateFonts(ocrArr.slice(0, pageNum), true); - const evalOpt = await evaluateFonts(ocrArr.slice(0, pageNum), true); - DebugData.evalOpt = evalOpt; + const bestMetricsOpt = calcBestFonts(FontCont.optMetrics); // The default font for both the optimized and unoptimized versions are set to the same font. // This ensures that switching on/off "font optimization" does not change the font, which would be confusing. - if (evalOpt.sansMetrics[evalOpt.minKeySans] < evalRaw.sansMetrics[evalRaw.minKeySans]) { - FontCont.sansDefaultName = evalOpt.minKeySans; + if (FontCont.optMetrics[bestMetricsOpt.minKeySans] < FontCont.rawMetrics[bestMetricsRaw.minKeySans]) { enableOptSans = true; + FontCont.sansDefaultName = bestMetricsOpt.minKeySans; } else { - FontCont.sansDefaultName = evalRaw.minKeySans; + FontCont.sansDefaultName = bestMetricsRaw.minKeySans; } // Repeat for serif fonts - if (evalOpt.serifMetrics[evalOpt.minKeySerif] < evalRaw.serifMetrics[evalRaw.minKeySerif]) { - FontCont.serifDefaultName = evalOpt.minKeySerif; + if (FontCont.optMetrics[bestMetricsOpt.minKeySerif] < FontCont.rawMetrics[bestMetricsRaw.minKeySerif]) { enableOptSerif = true; + FontCont.serifDefaultName = bestMetricsOpt.minKeySerif; } else { - FontCont.serifDefaultName = evalRaw.minKeySerif; - } - - // Create final optimized font object. - // The final optimized font is set to either the initial optimized font or the raw font depending on what fits better. - // Make shallow copy to allow for changing individual fonts without copying the entire object. - FontCont.opt = { ...FontCont.optInitial }; - - if (!enableOptSans) { - FontCont.opt.Carlito = fontRaw.Carlito; - FontCont.opt.NimbusSans = fontRaw.NimbusSans; - } - - if (!enableOptSerif) { - FontCont.opt.Century = fontRaw.Century; - FontCont.opt.Garamond = fontRaw.Garamond; - FontCont.opt.NimbusRomNo9L = fontRaw.NimbusRomNo9L; - FontCont.opt.Palatino = fontRaw.Palatino; + FontCont.serifDefaultName = bestMetricsRaw.minKeySerif; } } else { - FontCont.sansDefaultName = evalRaw.minKeySans; - FontCont.serifDefaultName = evalRaw.minKeySerif; + FontCont.sansDefaultName = bestMetricsRaw.minKeySans; + FontCont.serifDefaultName = bestMetricsRaw.minKeySerif; } - } - // Set final fonts in workers - await enableFontOpt(true, false, true); + FontCont.enableOpt = enableOptSerif || enableOptSans; - const enableOpt = enableOptSerif || enableOptSans; + // Send updated state to all workers. + await updateFontContWorkerMain(); + } - return enableOpt; + return FontCont.enableOpt; } diff --git a/js/generalWorkerMain.js b/js/generalWorkerMain.js index 65cd3ee..a06ca35 100644 --- a/js/generalWorkerMain.js +++ b/js/generalWorkerMain.js @@ -94,8 +94,7 @@ export async function initGeneralWorker() { obj.renderPageStaticImp = wrap('renderPageStaticImp'); obj.loadFontsWorker = wrap('loadFontsWorker'); - obj.setFontActiveWorker = wrap('setFontActiveWorker'); - obj.setDefaultFontNameWorker = wrap('setDefaultFontNameWorker'); + obj.updateFontContWorker = wrap('updateFontContWorker'); obj.terminate = () => worker.terminate(); @@ -171,6 +170,12 @@ export class gs { // so storing as a promise would require a lot of refactoring for little benefit. // The scheduler is a singleton that is only set up once, so there is no need to store it in a promise as long as setup race conditions are avoided. + /** Whether built-in fonts have been loaded in workers. */ + static loadedBuiltInRawWorker = false; + + /** Whether optimized fonts have been loaded in workers. */ + static loadedBuiltInOptWorker = false; + /** @type {?GeneralScheduler} */ static scheduler = null; @@ -289,7 +294,12 @@ export class gs { return /** @type {GeneralScheduler} */ (gs.scheduler); }; + static clear = () => { + gs.loadedBuiltInOptWorker = false; + }; + static terminate = async () => { + gs.clear(); gs.scheduler = null; await gs.schedulerInner.terminate(); gs.schedulerInner = null; @@ -297,5 +307,6 @@ export class gs { gs.schedulerReady = null; gs.resReadyTesseract = null; gs.schedulerReadyTesseract = null; + gs.loadedBuiltInRawWorker = false; }; } diff --git a/js/import/import.js b/js/import/import.js index ac3c68b..d9cff49 100644 --- a/js/import/import.js +++ b/js/import/import.js @@ -339,13 +339,12 @@ export async function importFiles(files, options = {}) { // not simply because the user disabled optimization in the view settings. // If no `enableOpt` property exists but metrics are present, then optimization is enabled. if (ocrData.enableOpt === 'false') { - opt.enableOpt = false; + FontCont.enableOpt = false; } else { await fontPromise; - const fontRaw = FontCont.getContainer('raw'); - if (!fontRaw) throw new Error('Raw font data not found.'); - FontCont.opt = await optimizeFontContainerAll(fontRaw, fontMetricsObj); - opt.enableOpt = true; + if (!FontCont.raw) throw new Error('Raw font data not found.'); + FontCont.opt = await optimizeFontContainerAll(FontCont.raw, fontMetricsObj); + FontCont.enableOpt = true; await enableFontOpt(true); } } @@ -442,7 +441,7 @@ export async function importFiles(files, options = {}) { if (!existingOpt && !stextMode) { await checkCharWarn(convertPageWarn); calcFontMetricsFromPages(ocrAll.active); - opt.enableOpt = await runFontOptimization(ocrAll.active); + await runFontOptimization(ocrAll.active); } }); } else if (inputData.pdfMode && (extractPDFTextNative || extractPDFTextOCR)) { diff --git a/js/recognizeConvert.js b/js/recognizeConvert.js index 50640a7..b44b5b9 100644 --- a/js/recognizeConvert.js +++ b/js/recognizeConvert.js @@ -552,7 +552,7 @@ export async function recognize(options = {}) { // Metrics from the LSTM model are so inaccurate they are not worth using. if (oemMode === 'legacy') { calcFontMetricsFromPages(ocrAll['Tesseract Legacy']); - opt.enableOpt = await runFontOptimization(ocrAll['Tesseract Legacy']); + await runFontOptimization(ocrAll['Tesseract Legacy']); } } else if (oemMode === 'combined') { await recognizeAllPages(true, true, true, langs, vanillaMode); @@ -603,7 +603,7 @@ export async function recognize(options = {}) { const pageNum = Math.min(ImageCache.pageCount - 1, 5); await ImageCache.preRenderRange(0, pageNum, true); calcFontMetricsFromPages(ocrAll['Tesseract Combined Temp']); - opt.enableOpt = await runFontOptimization(ocrAll['Tesseract Combined Temp']); + await runFontOptimization(ocrAll['Tesseract Combined Temp']); const oemText = 'Combined'; if (!ocrAll[oemText]) ocrAll[oemText] = Array(inputData.pageCount); diff --git a/js/worker/compareOCRModule.js b/js/worker/compareOCRModule.js index fa16b5b..25d54a9 100644 --- a/js/worker/compareOCRModule.js +++ b/js/worker/compareOCRModule.js @@ -137,10 +137,11 @@ export const initCanvasNode = async () => { } // This function is used before font optimization is complete, so `fontAll.opt` does not exist yet. - if (FontCont.optInitial) { - for (const [key1, value1] of Object.entries(FontCont.optInitial)) { - if (['Default', 'SansDefault', 'SerifDefault'].includes(key1)) continue; + if (FontCont.opt) { + for (const [key1, value1] of Object.entries(FontCont.opt)) { + if (['Default', 'SansDefault', 'SerifDefault'].includes(key1) || !value1) continue; for (const [key2, value2] of Object.entries(value1)) { + if (!value2) continue; await registerFontObj(value2); } } @@ -203,7 +204,7 @@ export async function evalWords({ const binaryImageBit = await getImageBitmap(binaryImage); - if (!FontCont.active) throw new Error('Fonts must be defined before running this function.'); + if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.'); if (!calcCtx) throw new Error('Canvases must be defined before running this function.'); const view = options?.view === undefined ? false : options?.view; @@ -1054,7 +1055,7 @@ export async function evalPageBase({ const binaryImageBit = binaryImage.imageBitmap || await getImageBitmap(binaryImage.src); - if (!FontCont.active) throw new Error('Fonts must be defined before running this function.'); + if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.'); if (!calcCtx) throw new Error('Canvases must be defined before running this function.'); let metricTotal = 0; @@ -1099,23 +1100,25 @@ export async function evalPageBase({ export async function evalPageFont({ page, binaryImage, pageMetricsObj, font, opt = false, }) { - const fontAllActiveSave = FontCont.active; + const enableOptSave = FontCont.enableOpt; + const forceOptSave = FontCont.forceOpt; // Allowing the font to be set here allows for better performance during font optimization compared to using the `enableFontOpt` function. // This is because the `enableFontOpt` function requires a response from the main thread and *every* worker before completing, which leads to non-trivial waiting time. if (opt === true) { - if (!FontCont.opt && !FontCont.optInitial) throw new Error('Optimized fonts requested but not defined.'); - FontCont.active = FontCont.opt || FontCont.optInitial; + if (!FontCont.opt) throw new Error('Optimized fonts requested but not defined.'); + FontCont.forceOpt = true; } else if (opt === false) { if (!FontCont.raw) throw new Error('Raw fonts requested but not defined.'); - FontCont.active = FontCont.raw; + FontCont.enableOpt = false; + FontCont.forceOpt = false; } /** * @param {OcrLine} ocrLineJ */ const transformLineFont = (ocrLineJ) => { - if (!FontCont.active) throw new Error('Fonts must be defined before running this function.'); + if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.'); if (!ocrLineJ.words[0]) { console.log('Line has 0 words, this should not happen.'); @@ -1125,7 +1128,7 @@ export async function evalPageFont({ // If the font is not set for a specific word, whether it is assumed sans/serif will be determined by the default font. const lineFontType = ocrLineJ.words[0].font ? FontCont.getWordFont(ocrLineJ.words[0]).type : FontCont.getFont('Default').type; - if (FontCont.active[font].normal.type !== lineFontType) return null; + if (FontCont.raw[font].normal.type !== lineFontType) return null; const ocrLineJClone = ocr.cloneLine(ocrLineJ); @@ -1140,7 +1143,8 @@ export async function evalPageFont({ page, binaryImage, pageMetricsObj, func: transformLineFont, }); - FontCont.active = fontAllActiveSave; + FontCont.enableOpt = enableOptSave; + FontCont.forceOpt = forceOptSave; return res; } @@ -1175,7 +1179,7 @@ export async function nudgePageBase({ const binaryImageBit = await getImageBitmap(binaryImage); - if (!FontCont.active) throw new Error('Fonts must be defined before running this function.'); + if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.'); if (!calcCtx) throw new Error('Canvases must be defined before running this function.'); let improveCt = 0; diff --git a/js/worker/generalWorker.js b/js/worker/generalWorker.js index e9a5f6a..b35ee68 100644 --- a/js/worker/generalWorker.js +++ b/js/worker/generalWorker.js @@ -346,19 +346,16 @@ async function loadFontsWorker({ src, opt }) { return true; } -async function setFontActiveWorker({ opt, sansDefaultName, serifDefaultName }) { - if (opt === true) { - FontCont.active = FontCont.opt; - } else if (opt === false) { - FontCont.active = FontCont.raw; - } - +async function updateFontContWorker({ + rawMetrics, optMetrics, defaultFontName, sansDefaultName, serifDefaultName, enableOpt, forceOpt, +}) { if (sansDefaultName) FontCont.sansDefaultName = sansDefaultName; if (serifDefaultName) FontCont.serifDefaultName = serifDefaultName; -} - -async function setDefaultFontNameWorker({ defaultFontName }) { - FontCont.defaultFontName = defaultFontName; + if (defaultFontName) FontCont.defaultFontName = defaultFontName; + if (rawMetrics) FontCont.rawMetrics = rawMetrics; + if (optMetrics) FontCont.optMetrics = optMetrics; + if (enableOpt === true || enableOpt === false) FontCont.enableOpt = enableOpt; + if (forceOpt === true || forceOpt === false) FontCont.forceOpt = forceOpt; } async function compareOCRPageImpWrap(args) { @@ -406,8 +403,7 @@ addEventListener('message', async (e) => { // Change state of worker loadFontsWorker, - setFontActiveWorker, - setDefaultFontNameWorker, + updateFontContWorker, })[func](args) .then((x) => postMessage({ data: x, id, status: 'resolve' })) .catch((err) => postMessage({ data: err, id, status: 'reject' })); diff --git a/js/worker/renderWordCanvas.js b/js/worker/renderWordCanvas.js index 2da116c..98dbaf1 100644 --- a/js/worker/renderWordCanvas.js +++ b/js/worker/renderWordCanvas.js @@ -14,7 +14,7 @@ import { calcLineFontSize, calcWordMetrics } from '../utils/fontUtils.js'; * @param {Array} [ctxViewArr] */ export async function drawWordActual(ctx, words, imageBinaryBit, imgDims, angle, ctxViewArr) { - if (!FontCont.active) throw new Error('Fonts must be defined before running this function.'); + if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.'); if (!ctx) throw new Error('Canvases must be defined before running this function.'); // The font/style from the first word is used for the purposes of font metrics @@ -157,7 +157,7 @@ const printWordOnCanvas = async ({ * @param {boolean} [imageRotated=false] - */ export const drawWordRender = async (ctx, word, offsetX = 0, cropY = 0, ctxView = null, imageRotated = false) => { - if (!FontCont.active) throw new Error('Fonts must be defined before running this function.'); + if (!FontCont.raw) throw new Error('Fonts must be defined before running this function.'); if (!ctx) throw new Error('Canvases must be defined before running this function.'); const fontI = FontCont.getWordFont(word); diff --git a/scribe.js b/scribe.js index 62b9853..a308361 100644 --- a/scribe.js +++ b/scribe.js @@ -225,7 +225,7 @@ const clear = async () => { */ const terminate = async () => { clearData(); - await Promise.allSettled([gs.terminate(), ImageCache.terminate()]); + await Promise.allSettled([gs.terminate(), ImageCache.terminate(), FontCont.terminate()]); }; export default { diff --git a/tests/cli/cli.spec.js b/tests/cli/cli.spec.js index 4e92640..e617248 100644 --- a/tests/cli/cli.spec.js +++ b/tests/cli/cli.spec.js @@ -2,8 +2,8 @@ import { assert, expect } from 'chai'; import fs from 'fs'; import path from 'path'; import { fileURLToPath } from 'url'; -import { getRandomAlphanum } from '../../js/utils/miscUtils.js'; import { checkCLI, confCLI, overlayCLI } from '../../cli/cli.js'; +import { getRandomAlphanum } from '../../js/utils/miscUtils.js'; globalThis.__dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -61,6 +61,7 @@ describe('Check Node.js commands.', () => { }).timeout(10000); it('Should check contents of Abbyy .xml file.', async () => { + // CLI equivalent: node cli/scribe.js check tests/assets/scribe_test_pdf1.pdf tests/assets/scribe_test_pdf1_abbyy.xml // Call the function await checkCLI(path.join(__dirname, '../assets/scribe_test_pdf1.pdf'), path.join(__dirname, '../assets/scribe_test_pdf1_abbyy.xml')); diff --git a/tests/module/recognize.spec.js b/tests/module/recognize.spec.js index f3ce58e..00f0341 100644 --- a/tests/module/recognize.spec.js +++ b/tests/module/recognize.spec.js @@ -37,14 +37,14 @@ describe('Check recognition-related features.', function () { }); it('Font optimization improves overlap quality', async () => { - if (!scribe.data.debug.evalRaw) throw new Error('DebugData.evalRaw is not defined'); - if (!scribe.data.debug.evalOpt) throw new Error('DebugData.evalOpt is not defined'); - assert.isBelow(scribe.data.debug.evalOpt.sansMetrics.NimbusSans, scribe.data.debug.evalRaw.sansMetrics.NimbusSans); - assert.isBelow(scribe.data.debug.evalOpt.sansMetrics.NimbusSans, 0.45); + if (!scribe.data.font.rawMetrics) throw new Error('DebugData.evalRaw is not defined'); + if (!scribe.data.font.optMetrics) throw new Error('DebugData.evalOpt is not defined'); + assert.isBelow(scribe.data.font.optMetrics.NimbusSans, scribe.data.font.rawMetrics.NimbusSans); + assert.isBelow(scribe.data.font.optMetrics.NimbusSans, 0.45); }).timeout(10000); it('Font optimization should be enabled when it improves overlap quality', async () => { - assert.strictEqual(scribe.opt.enableOpt, true); + assert.strictEqual(scribe.data.font.enableOpt, true); }).timeout(10000); after(async () => {