Skip to content

Commit

Permalink
Reworked font storage and optimization to improve performance
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica committed Sep 2, 2024
1 parent f19963a commit 2977fd5
Show file tree
Hide file tree
Showing 15 changed files with 285 additions and 299 deletions.
2 changes: 0 additions & 2 deletions js/containers/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ export class opt {

static extractText = false;

static enableOpt = false;

static enableUpscale = false;

static ignorePunct = false;
Expand Down
6 changes: 0 additions & 6 deletions js/containers/dataContainer.js
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,6 @@ export const pageMetricsArr = [];
export class DebugData {
/** @type {{[key: string]: Array<Array<CompDebugBrowser|CompDebugNode>> | undefined}} */
static debugImg = {};

/** @type {?Awaited<ReturnType<import('../fontEval.js').evaluateFonts>>} */
static evalRaw;

/** @type {?Awaited<ReturnType<import('../fontEval.js').evaluateFonts>>} */
static evalOpt;
}

/** @type {Array<Awaited<ReturnType<typeof import('../../scrollview-web/scrollview/ScrollView.js').ScrollView.prototype.getAll>>>} */
Expand Down
99 changes: 65 additions & 34 deletions js/containers/fontContainer.js
Original file line number Diff line number Diff line change
Expand Up @@ -225,15 +225,9 @@ export class FontCont {
/** @type {?FontContainer} */
static raw = null;

/** @type {?FontContainer} */
static optInitial = null;

/** @type {?FontContainer} */
static opt = null;

/** @type {?FontContainer} */
static active = null;

/** @type {?FontContainer} */
static export = null;

Expand All @@ -242,30 +236,56 @@ export class FontCont {
chi_sim: null,
};

/** Optimized fonts will be used when believed to improve quality. */
static enableOpt = false;

/** Optimized fonts will always be used when they exist, even if believed to reduce quality. */
static forceOpt = false;

/** @type {?Awaited<ReturnType<import('../fontEval.js').evaluateFonts>>} */
static rawMetrics = null;

/** @type {?Awaited<ReturnType<import('../fontEval.js').evaluateFonts>>} */
static optMetrics = null;

static defaultFontName = 'SerifDefault';

static serifDefaultName = 'NimbusRomNo9L';

static sansDefaultName = 'NimbusSans';

static loadedBuiltInRawWorker = false;

static loadedBuiltInOptWorker = false;

/** @type {?('latin'|'all')} */
static glyphSet = null;

/**
* Get raw/opt/active font, and throw exception if it does not exist.
* This method only exists for type inference purposes, as raw/opt/active may be accessed directly, but may be `null`.
* This method should therefore only be used in cases where an exception on `null` is a desirable behavior.
* @param {('raw'|'opt'|'active'|'optInitial')} container
* @returns {FontContainer}
* Decide whether to use the optimized version of a font family.
* Note that even when this function returns `true`, optimized versions of every style will not exist.
* @param {string} family - Font family name.
*/
static getContainer = (container) => {
const fontRes = FontCont[container];
if (!fontRes) throw new Error(`${container} font container does not exist.`);
return fontRes;
static useOptFamily = (family) => {
const raw = FontCont.raw?.[family]?.normal;
if (!raw) return false;
const opt = FontCont.opt?.[family]?.normal;
if (opt && FontCont.forceOpt) {
return true;
// If optimized fonts are enabled (but not forced), the optimized version of a font will be used if:
// (1) The optimized version exists
// (2) The optimized version has a better metric (so quality should improve).
// (3) The optimized version of the default sans/serif font also has a better metric.
// This last condition avoids font optimization being enabled in the UI when it only improves an unused font.
} if (opt && FontCont.enableOpt) {
const defaultFamily = raw.type === 'serif' ? FontCont.serifDefaultName : FontCont.sansDefaultName;

const rawMetricDefault = FontCont.rawMetrics?.[defaultFamily];
const optMetricDefault = FontCont.optMetrics?.[defaultFamily];

const rawMetric = FontCont.rawMetrics?.[family];
const optMetric = FontCont.optMetrics?.[family];
if (rawMetric && optMetric && optMetric < rawMetric && optMetricDefault < rawMetricDefault) {
return true;
}
}
return false;
};

/**
Expand All @@ -275,20 +295,19 @@ export class FontCont {
* @param {('Default'|'SansDefault'|'SerifDefault'|string)} family - Font family name.
* @param {('normal'|'italic'|'bold'|string)} [style='normal']
* @param {string} [lang='eng']
* @param {('raw'|'opt'|'active'|'optInitial')} [container='active']
* @returns {FontContainerFont}
*/
static getFont = (family, style = 'normal', lang = 'eng', container = 'active') => {
const fontCont = FontCont.getContainer(container);

static getFont = (family, style = 'normal', lang = 'eng') => {
if (lang === 'chi_sim') {
if (!FontCont.supp.chi_sim) throw new Error('chi_sim font does not exist.');
return FontCont.supp.chi_sim;
}

if (!FontCont.raw) throw new Error('Raw fonts not yet initialized.');

// Option 1: If we have access to the font, use it.
// Option 2: If we do not have access to the font, but it closely resembles a built-in font, use the built-in font.
if (!fontCont?.[family]?.[style]) {
if (!FontCont.raw?.[family]?.[style]) {
if (/Times/i.test(family)) {
family = 'NimbusRomNo9L';
} else if (/Helvetica/i.test(family)) {
Expand All @@ -309,7 +328,7 @@ export class FontCont {
}

// Option 3: If the font still is not identified, use the default sans/serif font.
if (!fontCont?.[family]?.[style]) {
if (!FontCont.raw?.[family]?.[style]) {
family = determineSansSerif(family);
}

Expand All @@ -318,31 +337,43 @@ export class FontCont {

if (family === 'SerifDefault') family = FontCont.serifDefaultName;
if (family === 'SansDefault') family = FontCont.sansDefaultName;
const fontRes = fontCont[family][style];

/** @type {FontContainerFont} */
let fontRes = FontCont.raw?.[family]?.[style];
if (!fontRes) throw new Error(`Font container does not contain ${family} (${style}).`);

const opt = FontCont.opt?.[family]?.[style];
const useOpt = FontCont.useOptFamily(family);
if (opt && useOpt) fontRes = opt;

return fontRes;
};

/**
*
* @param {OcrWord} word
* @param {('raw'|'opt'|'active'|'optInitial')} [container='active']
*/
static getWordFont = (word, container = 'active') => {
static getWordFont = (word) => {
const wordFontFamily = word.font || FontCont.defaultFontName;
return FontCont.getFont(wordFontFamily, word.style, word.lang, container);
return FontCont.getFont(wordFontFamily, word.style, word.lang);
};

/**
* Reset font container to original state but do not unload default resources.
*/
static clear = () => {
FontCont.active = FontCont.raw;
FontCont.optInitial = null;
FontCont.opt = null;
FontCont.loadedBuiltInRawWorker = false;
FontCont.loadedBuiltInOptWorker = false;
FontCont.glyphSet = null;
FontCont.rawMetrics = null;
FontCont.optMetrics = null;

FontCont.defaultFontName = 'SerifDefault';
FontCont.serifDefaultName = 'NimbusRomNo9L';
FontCont.sansDefaultName = 'NimbusSans';
};

static terminate = () => {
FontCont.clear();
FontCont.raw = null;
FontCont.glyphSet = null;
};
}
13 changes: 9 additions & 4 deletions js/export/exportPDF.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@ import ocr from '../objects/ocrObjects.js';
*/
export async function hocrToPDF(hocrArr, minpage = 0, maxpage = -1, textMode = 'ebook', rotateText = false, rotateBackground = false,
dimsLimit = { width: -1, height: -1 }, confThreshHigh = 85, confThreshMed = 75, proofOpacity = 0.8) {
// TODO: Currently, all fonts are added to the PDF, and mupdf removes the unused fonts.
// It would likely be more performant to only add the fonts that are actually used up front.
const exportFontObj = FontCont.getContainer('active');
if (!FontCont.raw) throw new Error('No fonts loaded.');

if (maxpage === -1) {
maxpage = hocrArr.length - 1;
Expand All @@ -47,13 +45,20 @@ export async function hocrToPDF(hocrArr, minpage = 0, maxpage = -1, textMode = '
// Add fonts
// All fonts are added at this step.
// The fonts that are not used will be removed by muPDF later.
// TODO: It would likely be more performant to only add the fonts that are actually used up front.
let fontI = 0;
let objectI = 3;
const pdfFonts = {};
/** @type {Array<string>} */
const pdfFontObjStrArr = [];
let pdfFontsStr = '';
for (const [familyKey, familyObj] of Object.entries(exportFontObj)) {
for (const familyKey of Object.keys(FontCont.raw)) {
const useOpt = FontCont.useOptFamily(familyKey);
const familyObj = {
normal: useOpt && FontCont.opt?.[familyKey]?.normal ? FontCont.opt[familyKey].normal : FontCont.raw[familyKey].normal,
italic: useOpt && FontCont.opt?.[familyKey]?.italic ? FontCont.opt[familyKey].italic : FontCont.raw[familyKey].italic,
bold: useOpt && FontCont.opt?.[familyKey]?.bold ? FontCont.opt[familyKey].bold : FontCont.raw[familyKey].bold,
};
pdfFonts[familyKey] = {};
for (const [key, value] of Object.entries(familyObj)) {
const font = await value.opentype;
Expand Down
Loading

0 comments on commit 2977fd5

Please sign in to comment.