Skip to content

Commit

Permalink
Added 'extractPDFTextImage' import option
Browse files Browse the repository at this point in the history
  • Loading branch information
Balearica committed Aug 22, 2024
1 parent 8e0a1e4 commit 76592a7
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion js/import/import.js
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ export function sortInputFiles(files) {
* @param {Object} [options]
* @param {boolean} [options.extractPDFTextNative=false] - Extract text from text-native PDF documents.
* @param {boolean} [options.extractPDFTextOCR=false] - Extract text from image-native PDF documents with existing OCR text layers.
* @param {boolean} [options.extractPDFTextImage=false] - Extract text from image-native PDF documents with no existing OCR layer.
* This option exists because documents may still contain some text even if they are determined to be image-native (for example, scanned documents with a text-native header).
* @returns
*/
export async function importFiles(files, options = {}) {
Expand All @@ -199,6 +201,7 @@ export async function importFiles(files, options = {}) {

const extractPDFTextNative = options?.extractPDFTextNative ?? false;
const extractPDFTextOCR = options?.extractPDFTextOCR ?? false;
const extractPDFTextImage = options?.extractPDFTextImage ?? false;

/** @type {Array<File|FileNode|ArrayBuffer>} */
let pdfFiles = [];
Expand Down Expand Up @@ -440,7 +443,7 @@ export async function importFiles(files, options = {}) {
}
});
} else if (extractPDFTextNative || extractPDFTextOCR) {
await extractInternalPDFText({ setActive: true, extractPDFTextNative, extractPDFTextOCR });
await extractInternalPDFText({ setActive: true, extractPDFTextNative, extractPDFTextOCR, extractPDFTextImage });
}
}

Expand Down

0 comments on commit 76592a7

Please sign in to comment.