Skip to content

Commit

Permalink
feat: Decoder engines (#387)
Browse files Browse the repository at this point in the history
  • Loading branch information
andiwand authored Dec 26, 2024
1 parent b668837 commit 9ca86c4
Show file tree
Hide file tree
Showing 66 changed files with 2,222 additions and 535 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ jobs:
-DCMAKE_CXX_FLAGS="-Werror"
-DCMAKE_INSTALL_PREFIX=install
-DODR_TEST=ON
-DWITH_PDF2HTMLEX=ON
-DWITH_WVWARE=ON
- name: cmake
if: runner.os == 'Windows'
Expand All @@ -82,6 +84,8 @@ jobs:
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=install
-DODR_TEST=ON
-DWITH_PDF2HTMLEX=OFF
-DWITH_WVWARE=OFF
- name: build
run: cmake --build build --config Release
Expand Down
41 changes: 33 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ set(ODR_SOURCE_FILES
"src/odr/internal/html/html_writer.cpp"
"src/odr/internal/html/image_file.cpp"
"src/odr/internal/html/pdf_file.cpp"
"src/odr/internal/html/pdf2htmlex_wrapper.hpp"
"src/odr/internal/html/text_file.cpp"

"src/odr/internal/json/json_file.cpp"
Expand Down Expand Up @@ -194,16 +195,40 @@ target_link_libraries(odr
utf8::cpp
)

if(WITH_PDF2HTMLEX)
target_sources(odr PRIVATE "src/odr/internal/html/pdf2htmlEX_wrapper.cpp")
if (WITH_PDF2HTMLEX)
find_package(pdf2htmlEX REQUIRED)
target_link_libraries(odr PRIVATE pdf2htmlex::pdf2htmlex)
endif(WITH_PDF2HTMLEX)
if(WITH_WVWARE)
target_sources(odr PRIVATE "src/odr/internal/html/wvWare_wrapper.cpp")
find_package(poppler REQUIRED)
target_sources(odr
PRIVATE
"src/odr/internal/html/pdf2htmlex_wrapper.cpp"
"src/odr/internal/pdf_poppler/poppler_pdf_file.cpp"
)
target_link_libraries(odr
PRIVATE
pdf2htmlex::pdf2htmlex
poppler::poppler
)
target_compile_definitions(odr
PRIVATE
ODR_WITH_PDF2HTMLEX
)
endif ()
if (WITH_WVWARE)
find_package(wvware REQUIRED)
target_link_libraries(odr PRIVATE wvware::wvware)
endif(WITH_WVWARE)
target_sources(odr
PRIVATE
"src/odr/internal/html/wvware_wrapper.cpp"
"src/odr/internal/oldms_wvware/wvware_oldms_file.cpp"
)
target_link_libraries(odr
PRIVATE
wvware::wvware
)
target_compile_definitions(odr
PRIVATE
ODR_WITH_WVWARE
)
endif ()

if (EXISTS "${PROJECT_SOURCE_DIR}/.git")
add_dependencies(odr check_git)
Expand Down
6 changes: 3 additions & 3 deletions scripts/conan
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash

conan install . --output-folder=cmake-build-relwithdebinfo --build=missing -s build_type=Release -s "&:build_type=RelWithDebInfo"
conan install . --output-folder=cmake-build-debug --build=missing -s build_type=Release -s "&:build_type=Debug"
conan install . --output-folder=cmake-build-release --build=never -s build_type=Release -s "&:build_type=Release"
conan install . --output-folder=cmake-build-relwithdebinfo --build=missing -s build_type=RelWithDebInfo -s "&:build_type=RelWithDebInfo"
conan install . --output-folder=cmake-build-debug --build=missing -s build_type=RelWithDebInfo -s "&:build_type=Debug"
conan install . --output-folder=cmake-build-release --build=missing -s build_type=RelWithDebInfo -s "&:build_type=Release"
3 changes: 3 additions & 0 deletions src/odr/exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ UnknownFileType::UnknownFileType() : std::runtime_error("unknown file type") {}
UnsupportedFileType::UnsupportedFileType(const FileType file_type)
: std::runtime_error("unknown file type"), file_type{file_type} {}

UnknownDecoderEngine::UnknownDecoderEngine()
: std::runtime_error("unknown decoder engine") {}

FileReadError::FileReadError() : std::runtime_error("file read error") {}

FileWriteError::FileWriteError() : std::runtime_error("file write error") {}
Expand Down
5 changes: 5 additions & 0 deletions src/odr/exceptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ struct UnsupportedFileType final : public std::runtime_error {
explicit UnsupportedFileType(FileType file_type);
};

/// @brief Unknown decoder engine exception
struct UnknownDecoderEngine final : public std::runtime_error {
UnknownDecoderEngine();
};

/// @brief File read error
struct FileReadError final : public std::runtime_error {
FileReadError();
Expand Down
45 changes: 40 additions & 5 deletions src/odr/file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <odr/internal/abstract/file.hpp>
#include <odr/internal/common/file.hpp>
#include <odr/internal/open_strategy.hpp>
#include <odr/internal/pdf/pdf_file.hpp>

#include <optional>
#include <utility>
Expand Down Expand Up @@ -59,6 +58,12 @@ std::vector<FileType> DecodedFile::types(const std::string &path) {
std::make_shared<internal::common::DiskFile>(path));
}

std::vector<DecoderEngine> DecodedFile::engines(const std::string &path,
FileType as) {
return internal::open_strategy::engines(
std::make_shared<internal::common::DiskFile>(path), as);
}

FileType DecodedFile::type(const std::string &path) {
return DecodedFile(path).file_type();
}
Expand All @@ -69,7 +74,7 @@ FileMeta DecodedFile::meta(const std::string &path) {

DecodedFile::DecodedFile(std::shared_ptr<internal::abstract::DecodedFile> impl)
: m_impl{std::move(impl)} {
if (!m_impl) {
if (m_impl == nullptr) {
throw UnknownFileType();
}
}
Expand All @@ -88,6 +93,11 @@ DecodedFile::DecodedFile(const std::string &path, FileType as)
: DecodedFile(internal::open_strategy::open_file(
std::make_shared<internal::common::DiskFile>(path), as)) {}

DecodedFile::DecodedFile(const std::string &path,
const DecodePreference &preference)
: DecodedFile(internal::open_strategy::open_file(
std::make_shared<internal::common::DiskFile>(path), preference)) {}

DecodedFile::operator bool() const { return m_impl.operator bool(); }

FileType DecodedFile::file_type() const noexcept { return m_impl->file_type(); }
Expand All @@ -98,6 +108,10 @@ FileCategory DecodedFile::file_category() const noexcept {

FileMeta DecodedFile::file_meta() const noexcept { return m_impl->file_meta(); }

DecoderEngine DecodedFile::decoder_engine() const noexcept {
return m_impl->decoder_engine();
}

File DecodedFile::file() const { return File(m_impl->file()); }

bool DecodedFile::is_text_file() const {
Expand All @@ -121,7 +135,8 @@ bool DecodedFile::is_document_file() const {
}

bool DecodedFile::is_pdf_file() const {
return std::dynamic_pointer_cast<internal::pdf::PdfFile>(m_impl) != nullptr;
return std::dynamic_pointer_cast<internal::abstract::PdfFile>(m_impl) !=
nullptr;
}

TextFile DecodedFile::text_file() const {
Expand Down Expand Up @@ -158,7 +173,7 @@ DocumentFile DecodedFile::document_file() const {

PdfFile DecodedFile::pdf_file() const {
if (auto pdf_file =
std::dynamic_pointer_cast<internal::pdf::PdfFile>(m_impl)) {
std::dynamic_pointer_cast<internal::abstract::PdfFile>(m_impl)) {
return PdfFile(pdf_file);
}
throw NoPdfFile();
Expand Down Expand Up @@ -229,7 +244,27 @@ DocumentMeta DocumentFile::document_meta() const {

Document DocumentFile::document() const { return Document(m_impl->document()); }

PdfFile::PdfFile(std::shared_ptr<internal::pdf::PdfFile> impl)
std::shared_ptr<internal::abstract::DocumentFile> DocumentFile::impl() const {
return m_impl;
}

PdfFile::PdfFile(std::shared_ptr<internal::abstract::PdfFile> impl)
: DecodedFile(impl), m_impl{std::move(impl)} {}

bool PdfFile::password_encrypted() const {
return m_impl->password_encrypted();
}

EncryptionState PdfFile::encryption_state() const {
return m_impl->encryption_state();
}

bool PdfFile::decrypt(const std::string &password) {
return m_impl->decrypt(password);
}

std::shared_ptr<internal::abstract::PdfFile> PdfFile::impl() const {
return m_impl;
}

} // namespace odr
39 changes: 32 additions & 7 deletions src/odr/file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,8 @@ class TextFile;
class ImageFile;
class ArchiveFile;
class DocumentFile;
} // namespace odr::internal::abstract

namespace odr::internal::pdf {
class PdfFile;
}
} // namespace odr::internal::abstract

namespace odr {
class TextFile;
Expand Down Expand Up @@ -97,6 +94,22 @@ enum class FileLocation {
disk,
};

/// @brief Collection of decoder engines.
enum class DecoderEngine {
odr,
poppler,
wvware,
};

/// @brief Preference for decoding files.
struct DecodePreference final {
std::optional<FileType> as_file_type;
std::optional<DecoderEngine> with_engine;

std::vector<FileType> file_type_priority;
std::vector<DecoderEngine> engine_priority;
};

/// @brief Collection of encryption states.
enum class EncryptionState {
unknown,
Expand Down Expand Up @@ -163,20 +176,24 @@ class File final {
class DecodedFile {
public:
static std::vector<FileType> types(const std::string &path);
static std::vector<DecoderEngine> engines(const std::string &path,
FileType as);
static FileType type(const std::string &path);
static FileMeta meta(const std::string &path);

explicit DecodedFile(std::shared_ptr<internal::abstract::DecodedFile>);
explicit DecodedFile(std::shared_ptr<internal::abstract::DecodedFile> impl);
explicit DecodedFile(const File &file);
DecodedFile(const File &file, FileType as);
explicit DecodedFile(const std::string &path);
DecodedFile(const std::string &path, FileType as);
DecodedFile(const std::string &path, const DecodePreference &preference);

[[nodiscard]] explicit operator bool() const;

[[nodiscard]] FileType file_type() const noexcept;
[[nodiscard]] FileCategory file_category() const noexcept;
[[nodiscard]] FileMeta file_meta() const noexcept;
[[nodiscard]] DecoderEngine decoder_engine() const noexcept;

[[nodiscard]] File file() const;

Expand Down Expand Up @@ -250,17 +267,25 @@ class DocumentFile final : public DecodedFile {

[[nodiscard]] Document document() const;

[[nodiscard]] std::shared_ptr<internal::abstract::DocumentFile> impl() const;

private:
std::shared_ptr<internal::abstract::DocumentFile> m_impl;
};

/// @brief Represents a PDF file.
class PdfFile final : public DecodedFile {
public:
explicit PdfFile(std::shared_ptr<internal::pdf::PdfFile>);
explicit PdfFile(std::shared_ptr<internal::abstract::PdfFile>);

[[nodiscard]] bool password_encrypted() const;
[[nodiscard]] EncryptionState encryption_state() const;
bool decrypt(const std::string &password);

[[nodiscard]] std::shared_ptr<internal::abstract::PdfFile> impl() const;

private:
std::shared_ptr<internal::pdf::PdfFile> m_impl;
std::shared_ptr<internal::abstract::PdfFile> m_impl;
};

} // namespace odr
Expand Down
54 changes: 44 additions & 10 deletions src/odr/html.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
#include <odr/internal/html/document.hpp>
#include <odr/internal/html/filesystem.hpp>
#include <odr/internal/html/image_file.hpp>
#include <odr/internal/html/pdf2htmlex_wrapper.hpp>
#include <odr/internal/html/pdf_file.hpp>
#include <odr/internal/html/text_file.hpp>
#include <odr/internal/html/wvware_wrapper.hpp>
#include <odr/internal/oldms_wvware/wvware_oldms_file.hpp>
#include <odr/internal/pdf_poppler/poppler_pdf_file.hpp>

#include <filesystem>

Expand Down Expand Up @@ -73,11 +77,9 @@ Html html::translate(const DecodedFile &decoded_file,
} else if (decoded_file.is_image_file()) {
return translate(decoded_file.image_file(), output_path, config);
} else if (decoded_file.is_archive_file()) {
return translate(decoded_file.archive_file().archive(), output_path,
config);
return translate(decoded_file.archive_file(), output_path, config);
} else if (decoded_file.is_document_file()) {
return translate(decoded_file.document_file().document(), output_path,
config);
return translate(decoded_file.document_file(), output_path, config);
} else if (decoded_file.is_pdf_file()) {
return translate(decoded_file.pdf_file(), output_path, config);
}
Expand All @@ -97,6 +99,44 @@ Html html::translate(const ImageFile &image_file,
return internal::html::translate_image_file(image_file, output_path, config);
}

Html html::translate(const ArchiveFile &archive_file,
const std::string &output_path, const HtmlConfig &config) {
return translate(archive_file.archive(), output_path, config);
}

Html html::translate(const DocumentFile &document_file,
const std::string &output_path, const HtmlConfig &config) {
auto document_file_impl = document_file.impl();

#ifdef ODR_WITH_WVWARE
if (auto wv_document_file =
std::dynamic_pointer_cast<internal::WvWareLegacyMicrosoftFile>(
document_file_impl)) {
fs::create_directories(output_path);
return internal::html::translate_wvware_oldms_file(*wv_document_file,
output_path, config);
}
#endif

return translate(document_file.document(), output_path, config);
}

Html html::translate(const PdfFile &pdf_file, const std::string &output_path,
const HtmlConfig &config) {
auto pdf_file_impl = pdf_file.impl();

#ifdef ODR_WITH_PDF2HTMLEX
if (auto poppler_pdf_file =
std::dynamic_pointer_cast<internal::PopplerPdfFile>(pdf_file_impl)) {
fs::create_directories(output_path);
return internal::html::translate_poppler_pdf_file(*poppler_pdf_file,
output_path, config);
}
#endif

return internal::html::translate_pdf_file(pdf_file, output_path, config);
}

Html html::translate(const Archive &archive, const std::string &output_path,
const HtmlConfig &config) {
fs::create_directories(output_path);
Expand All @@ -110,12 +150,6 @@ Html html::translate(const Document &document, const std::string &output_path,
return internal::html::translate_document(document, output_path, config);
}

Html html::translate(const PdfFile &pdf_file, const std::string &output_path,
const HtmlConfig &config) {
fs::create_directories(output_path);
return internal::html::translate_pdf_file(pdf_file, output_path, config);
}

void html::edit(const Document &document, const char *diff) {
auto json = nlohmann::json::parse(diff);
for (const auto &[key, value] : json["modifiedText"].items()) {
Expand Down
Loading

0 comments on commit 9ca86c4

Please sign in to comment.