Skip to content

Commit

Permalink
Refactored scheduler Node.js vs. browser code
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica committed Sep 6, 2024
1 parent 7e89107 commit 65e4f40
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 148 deletions.
4 changes: 2 additions & 2 deletions js/containers/imageContainer.js
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,12 @@ export class ImageCache {
// If no preference is specified for upscaling, default to false.
const upscaleArg = props?.upscaled || false;

const scheduler = await gs.getGeneralScheduler();
await gs.getGeneralScheduler();

const resPromise = (async () => {
// Wait for non-rotated version before replacing with promise
if (typeof process === 'undefined') await gs.initTesseract({ anyOk: true });
return scheduler.recognize({
return gs.recognize({
image: inputImage.src,
options: { rotateRadians: angleArg, upscale: upscaleArg },
output: {
Expand Down
24 changes: 5 additions & 19 deletions js/debug.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,25 +114,11 @@ export async function drawDebugImages(args) {
export async function renderPageStatic(page) {
const image = await ImageCache.getNative(page.n, { rotated: opt.autoRotate, upscaled: false });

// The Node.js canvas package does not currently support worker threads
// https://github.com/Automattic/node-canvas/issues/1394
let res;
if (!(typeof process === 'undefined')) {
const { renderPageStaticImp } = await import('./worker/compareOCRModule.js');
res = await renderPageStaticImp({
page,
image,
angle: pageMetricsArr[page.n].angle,
});
// Browser case
} else {
if (!gs.scheduler) throw new Error('GeneralScheduler must be defined before this function can run.');
res = await gs.scheduler.renderPageStaticImp({
page,
image,
angle: pageMetricsArr[page.n].angle,
});
}
const res = gs.renderPageStaticImp({
page,
image,
angle: pageMetricsArr[page.n].angle,
});

return res;
}
6 changes: 2 additions & 4 deletions js/fontContainerMain.js
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,6 @@ export function setDefaultFontAuto(fontMetricsObj) {
* @param {Object.<string, FontMetricsFamily>} fontMetricsObj
*/
export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) {
if (!gs.scheduler) throw new Error('GeneralScheduler must be defined before this function can run.');

// When we have metrics for individual fonts families, those are used to optimize the appropriate fonts.
// Otherwise, the "default" metric is applied to whatever font the user has selected as the default font.
const multiFontMode = checkMultiFontMode(fontMetricsObj);
Expand All @@ -342,7 +340,7 @@ export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) {
}

const metricsNormal = fontMetricsObj[fontMetricsType][fontFamily.normal.style];
const normalOptFont = gs.scheduler.optimizeFont({ fontData: fontFamily.normal.src, fontMetricsObj: metricsNormal, style: fontFamily.normal.style })
const normalOptFont = gs.optimizeFont({ fontData: fontFamily.normal.src, fontMetricsObj: metricsNormal, style: fontFamily.normal.style })
.then(async (x) => {
const font = await loadOpentype(x.fontData, x.kerningPairs);
return new FontContainerFont(fontFamily.normal.family, fontFamily.normal.style, x.fontData, true, font);
Expand All @@ -352,7 +350,7 @@ export async function optimizeFontContainerFamily(fontFamily, fontMetricsObj) {
/** @type {?FontContainerFont|Promise<FontContainerFont>} */
let italicOptFont = null;
if (metricsItalic && metricsItalic.obs >= 200) {
italicOptFont = gs.scheduler.optimizeFont({ fontData: fontFamily.italic.src, fontMetricsObj: metricsItalic, style: fontFamily.italic.style })
italicOptFont = gs.optimizeFont({ fontData: fontFamily.italic.src, fontMetricsObj: metricsItalic, style: fontFamily.italic.style })
.then(async (x) => {
const font = await loadOpentype(x.fontData, x.kerningPairs);
return new FontContainerFont(fontFamily.italic.family, fontFamily.italic.style, x.fontData, true, font);
Expand Down
32 changes: 7 additions & 25 deletions js/fontEval.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ import { gs } from './generalWorkerMain.js';
* @param {number} n - Number of words to compare
*/
export async function evalPagesFont(font, pageArr, opt, n = 500) {
if (!gs.scheduler) throw new Error('GeneralScheduler must be defined before this function can run.');

let metricTotal = 0;
let wordsTotal = 0;

Expand All @@ -26,29 +24,13 @@ export async function evalPagesFont(font, pageArr, opt, n = 500) {

const imageI = await ImageCache.getBinary(i);

// The Node.js canvas package does not currently support worker threads
// https://github.com/Automattic/node-canvas/issues/1394
let res;
if (!(typeof process === 'undefined')) {
const { evalPageFont } = await import('./worker/compareOCRModule.js');

res = await evalPageFont({
font,
page: pageArr[i],
binaryImage: imageI,
pageMetricsObj: pageMetricsArr[i],
opt,
});
// Browser case
} else {
res = await gs.scheduler.evalPageFont({
font,
page: pageArr[i],
binaryImage: imageI,
pageMetricsObj: pageMetricsArr[i],
opt,
});
}
const res = await gs.evalPageFont({
font,
page: pageArr[i],
binaryImage: imageI,
pageMetricsObj: pageMetricsArr[i],
opt,
});

metricTotal += res.metricTotal;
wordsTotal += res.wordsTotal;
Expand Down
178 changes: 98 additions & 80 deletions js/generalWorkerMain.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,62 +102,6 @@ export async function initGeneralWorker() {
});
}

export class GeneralScheduler {
constructor(scheduler) {
this.scheduler = scheduler;
/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').compareOCRPageImp>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').compareOCRPageImp>}
*/
this.compareOCRPageImp = async (args) => (await this.scheduler.addJob('compareOCRPageImp', args));
/**
* @param {Parameters<typeof import('./worker/optimizeFontModule.js').optimizeFont>[0]} args
* @returns {ReturnType<typeof import('./worker/optimizeFontModule.js').optimizeFont>}
*/
this.optimizeFont = async (args) => (await this.scheduler.addJob('optimizeFont', args));
/**
* @template {Partial<Tesseract.OutputFormats>} TO
* @param {Object} args
* @param {Parameters<Tesseract.Worker['recognize']>[0]} args.image
* @param {Parameters<Tesseract.Worker['recognize']>[1]} args.options
* @param {TO} args.output
* @returns {Promise<Tesseract.Page<TO>>}
* Exported for type inference purposes, should not be imported anywhere.
*/
this.recognize = async (args) => (await this.scheduler.addJob('recognize', args));
/**
* @param {Parameters<typeof import('./worker/generalWorker.js').recognizeAndConvert>[0]} args
* @returns {ReturnType<typeof import('./worker/generalWorker.js').recognizeAndConvert>}
*/
this.recognizeAndConvert = async (args) => (await this.scheduler.addJob('recognizeAndConvert', args));
/**
* @param {Parameters<typeof import('./worker/generalWorker.js').recognizeAndConvert2>[0]} args
* @returns {Promise<[ReturnType<typeof import('./worker/generalWorker.js').recognizeAndConvert>, ReturnType<typeof import('./worker/generalWorker.js').recognizeAndConvert>]>}
*/
this.recognizeAndConvert2 = async (args) => (await this.scheduler.addJob('recognizeAndConvert2', args));
/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').evalPageBase>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').evalPageBase>}
*/
this.evalPageBase = async (args) => (await this.scheduler.addJob('evalPageBase', args));
/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').evalWords>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').evalWords>}
*/
this.evalWords = async (args) => (await this.scheduler.addJob('evalWords', args));
/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').evalPageFont>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').evalPageFont>}
*/
this.evalPageFont = async (args) => (await this.scheduler.addJob('evalPageFont', args));
/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').renderPageStaticImp>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').renderPageStaticImp>}
*/
this.renderPageStaticImp = async (args) => (await this.scheduler.addJob('renderPageStaticImp', args));
}
}

/**
* This class stores the scheduler and related promises.
*/
Expand All @@ -177,37 +121,112 @@ export class gs {
static loadedBuiltInOptWorker = false;

/** @type {?GeneralScheduler} */
static scheduler = null;
// static scheduler = null;

/** @type {?import('../tess/tesseract.esm.min.js').default} */
static schedulerInner = null;

/** @type {?Function} */
static resReady = null;
static #resReady = null;

/** @type {?Promise<void>} */
static schedulerReady = null;

static setSchedulerReady = () => {
gs.schedulerReady = new Promise((resolve, reject) => {
gs.resReady = resolve;
});
};

/** @type {?Function} */
static resReadyTesseract = null;
static #resReadyTesseract = null;

/** @type {?Promise<void>} */
static schedulerReadyTesseract = null;

static setSchedulerReadyTesseract = () => {
gs.schedulerReadyTesseract = new Promise((resolve, reject) => {
gs.resReadyTesseract = resolve;
});
/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').compareOCRPageImp>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').compareOCRPageImp>}
*/
static compareOCRPageImp = async (args) => {
if (typeof process === 'undefined') {
return await gs.schedulerInner.addJob('compareOCRPageImp', args);
// eslint-disable-next-line no-else-return
} else {
// The Node.js canvas package does not currently support worker threads
// https://github.com/Automattic/node-canvas/issues/1394
const compareOCRPageImp = (await import('./worker/compareOCRModule.js')).compareOCRPageImp;
return await compareOCRPageImp(args);
}
};

/**
* @param {Parameters<typeof import('./worker/optimizeFontModule.js').optimizeFont>[0]} args
* @returns {ReturnType<typeof import('./worker/optimizeFontModule.js').optimizeFont>}
*/
static optimizeFont = async (args) => (await gs.schedulerInner.addJob('optimizeFont', args));

/**
* @template {Partial<Tesseract.OutputFormats>} TO
* @param {Object} args
* @param {Parameters<Tesseract.Worker['recognize']>[0]} args.image
* @param {Parameters<Tesseract.Worker['recognize']>[1]} args.options
* @param {TO} args.output
* @returns {Promise<Tesseract.Page<TO>>}
* Exported for type inference purposes, should not be imported anywhere.
*/
static recognize = async (args) => (await gs.schedulerInner.addJob('recognize', args));

/**
* @param {Parameters<typeof import('./worker/generalWorker.js').recognizeAndConvert>[0]} args
* @returns {ReturnType<typeof import('./worker/generalWorker.js').recognizeAndConvert>}
*/
static recognizeAndConvert = async (args) => (await gs.schedulerInner.addJob('recognizeAndConvert', args));

/**
* @param {Parameters<typeof import('./worker/generalWorker.js').recognizeAndConvert2>[0]} args
* @returns {Promise<[ReturnType<typeof import('./worker/generalWorker.js').recognizeAndConvert>, ReturnType<typeof import('./worker/generalWorker.js').recognizeAndConvert>]>}
*/
static recognizeAndConvert2 = async (args) => (await gs.schedulerInner.addJob('recognizeAndConvert2', args));

/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').evalPageBase>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').evalPageBase>}
*/
static evalPageBase = async (args) => {
if (typeof process === 'undefined') {
return await gs.schedulerInner.addJob('evalPageBase', args);
// eslint-disable-next-line no-else-return
} else {
const evalPageBase = (await import('./worker/compareOCRModule.js')).evalPageBase;
return await evalPageBase(args);
}
};

/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').evalWords>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').evalWords>}
*/
static evalWords = async (args) => (await gs.schedulerInner.addJob('evalWords', args));

/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').evalPageFont>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').evalPageFont>}
*/
static evalPageFont = async (args) => {
if (typeof process === 'undefined') {
return await gs.schedulerInner.addJob('evalPageFont', args);
// eslint-disable-next-line no-else-return
} else {
const evalPageFont = (await import('./worker/compareOCRModule.js')).evalPageFont;
return await evalPageFont(args);
}
};

/**
* @param {Parameters<typeof import('./worker/compareOCRModule.js').renderPageStaticImp>[0]} args
* @returns {ReturnType<typeof import('./worker/compareOCRModule.js').renderPageStaticImp>}
*/
static renderPageStaticImp = async (args) => (await gs.schedulerInner.addJob('renderPageStaticImp', args));

static init = async () => {
gs.setSchedulerReady();
gs.schedulerReady = new Promise((resolve, reject) => {
gs.#resReady = resolve;
});

// Determine number of workers to use in the browser.
// This is the minimum of:
Expand Down Expand Up @@ -240,10 +259,8 @@ export class gs {

await Promise.all(resArr);

gs.scheduler = new GeneralScheduler(gs.schedulerInner);

// @ts-ignore
gs.resReady(true);
gs.#resReady(true);
};

/**
Expand All @@ -263,7 +280,9 @@ export class gs {

if (gs.schedulerReadyTesseract) await gs.schedulerReadyTesseract;

gs.setSchedulerReadyTesseract();
gs.schedulerReadyTesseract = new Promise((resolve, reject) => {
gs.#resReadyTesseract = resolve;
});

// Wait for the first worker to load.
// A behavior (likely bug) was observed where, if the workers are loaded in parallel,
Expand All @@ -276,7 +295,7 @@ export class gs {
await Promise.allSettled(resArr);
}
// @ts-ignore
gs.resReadyTesseract(true);
gs.#resReadyTesseract(true);
return gs.schedulerReadyTesseract;
};

Expand All @@ -286,12 +305,12 @@ export class gs {
static getGeneralScheduler = async () => {
if (gs.schedulerReady) {
await gs.schedulerReady;
return /** @type {GeneralScheduler} */ (gs.scheduler);
return;
}

await gs.init();

return /** @type {GeneralScheduler} */ (gs.scheduler);
return;
};

static clear = () => {
Expand All @@ -300,12 +319,11 @@ export class gs {

static terminate = async () => {
gs.clear();
gs.scheduler = null;
await gs.schedulerInner.terminate();
gs.schedulerInner = null;
gs.resReady = null;
gs.#resReady = null;
gs.schedulerReady = null;
gs.resReadyTesseract = null;
gs.#resReadyTesseract = null;
gs.schedulerReadyTesseract = null;
gs.loadedBuiltInRawWorker = false;
};
Expand Down
Loading

0 comments on commit 65e4f40

Please sign in to comment.