Skip to content

Commit

Permalink
draft
Browse files Browse the repository at this point in the history
  • Loading branch information
andiwand committed Sep 16, 2024
1 parent 04bb3d5 commit 282ea94
Show file tree
Hide file tree
Showing 8 changed files with 175 additions and 13 deletions.
33 changes: 25 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ set(ODR_SOURCE_FILES
"src/odr/internal/html/html_writer.cpp"
"src/odr/internal/html/image_file.cpp"
"src/odr/internal/html/pdf_file.cpp"
"src/odr/internal/html/pdf_poppler_file.hpp"
"src/odr/internal/html/text_file.cpp"

"src/odr/internal/json/json_file.cpp"
Expand Down Expand Up @@ -194,16 +195,32 @@ target_link_libraries(odr
utf8::cpp
)

if(WITH_PDF2HTMLEX)
target_sources(odr PRIVATE "src/odr/internal/html/pdf2htmlEX_wrapper.cpp")
if (WITH_PDF2HTMLEX)
find_package(pdf2htmlEX REQUIRED)
target_link_libraries(odr PRIVATE pdf2htmlex::pdf2htmlex)
endif(WITH_PDF2HTMLEX)
if(WITH_WVWARE)
target_sources(odr PRIVATE "src/odr/internal/html/wvWare_wrapper.cpp")
find_package(poppler REQUIRED)
target_sources(odr
PRIVATE
"src/odr/internal/html/pdf2htmlEX_wrapper.cpp"
"src/odr/internal/html/pdf_poppler_file.cpp"
"src/odr/internal/pdf_poppler/poppler_pdf_file.cpp"
)
target_link_libraries(odr
PRIVATE
pdf2htmlex::pdf2htmlex
poppler::poppler
)
endif ()
if (WITH_WVWARE)
find_package(wvware REQUIRED)
target_link_libraries(odr PRIVATE wvware::wvware)
endif(WITH_WVWARE)
target_sources(odr
PRIVATE
"src/odr/internal/html/wvWare_wrapper.cpp"
)
target_link_libraries(odr
PRIVATE
wvware::wvware
)
endif ()

if (EXISTS "${PROJECT_SOURCE_DIR}/.git")
add_dependencies(odr check_git)
Expand Down
2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def requirements(self):
self.requires("uchardet/0.0.8")
self.requires("utfcpp/4.0.4")
if self.options.get_safe("with_pdf2htmlEX", False):
self.requires("pdf2htmlex/0.18.8.rc1-20240905-git")
self.requires("pdf2htmlex/0.18.8.rc1-20240905-git", transitive_headers=True, transitive_libs=True)
if self.options.get_safe("with_wvWare", False):
self.requires("wvware/1.2.9")

Expand Down
2 changes: 1 addition & 1 deletion src/odr/file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ enum class FileLocation {
/// @brief Collection of decoder engines.
enum class DecoderEngine {
odr,
pdf2html_ex,
poppler,
wv_ware,
};

Expand Down
6 changes: 3 additions & 3 deletions src/odr/internal/html/pdf_file.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef ODR_INTERNAL_PDF_FILE_HPP
#define ODR_INTERNAL_PDF_FILE_HPP
#ifndef ODR_INTERNAL_HTML_PDF_FILE_HPP
#define ODR_INTERNAL_HTML_PDF_FILE_HPP

#include <string>

Expand All @@ -17,4 +17,4 @@ Html translate_pdf_file(const PdfFile &pdf_file, const std::string &output_path,

}

#endif // ODR_INTERNAL_PDF_FILE_HPP
#endif // ODR_INTERNAL_HTML_PDF_FILE_HPP
58 changes: 58 additions & 0 deletions src/odr/internal/html/pdf_poppler_file.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include <odr/internal/html/pdf_poppler_file.hpp>

#include <odr/exceptions.hpp>
#include <odr/file.hpp>
#include <odr/html.hpp>

#include <odr/internal/common/file.hpp>
#include <odr/internal/pdf_poppler/poppler_pdf_file.hpp>
#include <odr/internal/project_info.hpp>

#include <pdf2htmlEX.h>

#include <cstring>

namespace odr::internal {

Html html::translate_pdf_poppler_file(const PopplerPdfFile &pdf_file,
const std::string &output_path,
const HtmlConfig &config) {
static const char *fontconfig_path = getenv("FONTCONFIG_PATH");
if (nullptr == fontconfig_path) {
// Storage is allocated and after successful putenv, it will never be freed.
// This is the way of putenv.
char *storage = strdup("FONTCONFIG_PATH=" FONTCONFIG_PATH);
if (0 != putenv(storage)) {
free(storage);
}
fontconfig_path = getenv("FONTCONFIG_PATH");
}

pdf2htmlEX::pdf2htmlEX pdf2htmlEX;
pdf2htmlEX.setDataDir(PDF2HTMLEX_DATA_DIR);
pdf2htmlEX.setPopplerDataDir(POPPLER_DATA_DIR);

pdf2htmlEX.setDestinationDir(output_path);
auto output_file_name = "document.html";
pdf2htmlEX.setOutputFilename(output_file_name);

pdf2htmlEX.setDRM(false);
pdf2htmlEX.setProcessOutline(false);
pdf2htmlEX.setProcessAnnotation(true);

try {
pdf2htmlEX.convert();
} catch (const pdf2htmlEX::EncryptionPasswordException &e) {
throw WrongPassword();
} catch (const pdf2htmlEX::DocumentCopyProtectedException &e) {
throw std::runtime_error("document is copy protected");
} catch (const pdf2htmlEX::ConversionFailedException &e) {
throw std::runtime_error(std::string("conversion error ") + e.what());
}

return {FileType::portable_document_format,
config,
{{"document", output_path + "/" + output_file_name}}};
}

} // namespace odr::internal
21 changes: 21 additions & 0 deletions src/odr/internal/html/pdf_poppler_file.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#ifndef ODR_INTERNAL_HTML_PDF_POPPLER_FILE_HPP
#define ODR_INTERNAL_HTML_PDF_POPPLER_FILE_HPP

#include <string>

namespace odr {
class PopplerPdfFile;

struct HtmlConfig;
class Html;
} // namespace odr

namespace odr::internal::html {

Html translate_pdf_poppler_file(const PopplerPdfFile &pdf_file,
const std::string &output_path,
const HtmlConfig &config);

}

#endif // ODR_INTERNAL_HTML_PDF_POPPLER_FILE_HPP
36 changes: 36 additions & 0 deletions src/odr/internal/pdf_poppler/poppler_pdf_file.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <odr/internal/pdf_poppler/poppler_pdf_file.hpp>

#include <odr/internal/common/path.hpp>

#include <poppler/PDFDocFactory.h>
#include <poppler/goo/GooString.h>

namespace odr::internal::poppler_pdf {

PopplerPdfFile::PopplerPdfFile(std::shared_ptr<common::DiskFile> file)
: m_file{std::move(file)} {
GooString file_path(file->disk_path()->string().c_str());
m_pdf_doc = std::unique_ptr<PDFDoc>(PDFDocFactory().createPDFDoc(file_path));
}

FileCategory PopplerPdfFile::file_category() const noexcept {
return FileCategory::document;
}

std::shared_ptr<abstract::File> PopplerPdfFile::file() const noexcept {
return m_file;
}

FileType PopplerPdfFile::file_type() const noexcept {
return FileType::portable_document_format;
}

FileMeta PopplerPdfFile::file_meta() const noexcept { return {}; }

DecoderEngine PopplerPdfFile::decoder_engine() const noexcept {
return DecoderEngine::poppler;
}

const PDFDoc &PopplerPdfFile::pdf_doc() const { return *m_pdf_doc; }

} // namespace odr::internal::poppler_pdf
30 changes: 30 additions & 0 deletions src/odr/internal/pdf_poppler/poppler_pdf_file.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#ifndef ODR_INTERNAL_POPPLER_PDF_FILE_HPP
#define ODR_INTERNAL_POPPLER_PDF_FILE_HPP

#include <odr/internal/common/file.hpp>

class PDFDoc;

namespace odr::internal::poppler_pdf {

class PopplerPdfFile : public abstract::DecodedFile {
public:
explicit PopplerPdfFile(std::shared_ptr<common::DiskFile> file);

[[nodiscard]] std::shared_ptr<abstract::File> file() const noexcept final;

[[nodiscard]] FileType file_type() const noexcept final;
[[nodiscard]] FileCategory file_category() const noexcept final;
[[nodiscard]] FileMeta file_meta() const noexcept final;
[[nodiscard]] DecoderEngine decoder_engine() const noexcept final;

[[nodiscard]] const PDFDoc &pdf_doc() const;

private:
std::shared_ptr<abstract::File> m_file;
std::unique_ptr<PDFDoc> m_pdf_doc;
};

} // namespace odr::internal::poppler_pdf

#endif // ODR_INTERNAL_POPPLER_PDF_FILE_HPP

0 comments on commit 282ea94

Please sign in to comment.