Skip to content

Commit

Permalink
draft
Browse files Browse the repository at this point in the history
  • Loading branch information
andiwand committed Sep 16, 2024
1 parent 52d0fe9 commit 0cd4d85
Show file tree
Hide file tree
Showing 11 changed files with 277 additions and 27 deletions.
3 changes: 3 additions & 0 deletions src/odr/file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ enum class DecoderEngine {

/// @brief Preference for decoding files.
struct DecodePreference final {
std::optional<FileType> as_file_type;
std::optional<DecoderEngine> with_engine;

std::vector<FileType> file_type_priority;
std::vector<DecoderEngine> engine_priority;
};
Expand Down
12 changes: 11 additions & 1 deletion src/odr/html.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
#include <odr/internal/html/document.hpp>
#include <odr/internal/html/filesystem.hpp>
#include <odr/internal/html/image_file.hpp>
#include <odr/internal/html/pdf2htmlEX_wrapper.hpp>
#include <odr/internal/html/pdf_file.hpp>
#include <odr/internal/html/text_file.hpp>
#include <odr/internal/pdf_poppler/poppler_pdf_file.hpp>

#include <filesystem>

Expand Down Expand Up @@ -112,7 +114,15 @@ Html html::translate(const Document &document, const std::string &output_path,

Html html::translate(const PdfFile &pdf_file, const std::string &output_path,
const HtmlConfig &config) {
fs::create_directories(output_path);
auto pdf_file_impl = pdf_file.impl();

if (auto poppler_pdf_file =
std::dynamic_pointer_cast<internal::PopplerPdfFile>(pdf_file_impl)) {
fs::create_directories(output_path);
return internal::html::translate_poppler_pdf_file(*poppler_pdf_file,
output_path, config);
}

return internal::html::translate_pdf_file(pdf_file, output_path, config);
}

Expand Down
2 changes: 1 addition & 1 deletion src/odr/internal/html/pdf2htmlEX_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

namespace odr::internal {

Html html::translate_pdf_poppler_file(const PopplerPdfFile &pdf_file,
Html html::translate_poppler_pdf_file(const PopplerPdfFile &pdf_file,
const std::string &output_path,
const HtmlConfig &config) {
PDFDoc &pdf_doc = pdf_file.pdf_doc();
Expand Down
2 changes: 1 addition & 1 deletion src/odr/internal/html/pdf2htmlEX_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class PopplerPdfFile;

namespace odr::internal::html {

Html translate_pdf_poppler_file(const PopplerPdfFile &pdf_file,
Html translate_poppler_pdf_file(const PopplerPdfFile &pdf_file,
const std::string &output_path,
const HtmlConfig &config);

Expand Down
227 changes: 223 additions & 4 deletions src/odr/internal/open_strategy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <odr/internal/oldms/oldms_file.hpp>
#include <odr/internal/ooxml/ooxml_file.hpp>
#include <odr/internal/pdf/pdf_file.hpp>
#include <odr/internal/pdf_poppler/poppler_pdf_file.hpp>
#include <odr/internal/svm/svm_file.hpp>
#include <odr/internal/zip/zip_file.hpp>

Expand Down Expand Up @@ -93,6 +94,26 @@ open_strategy::types(const std::shared_ptr<abstract::File> &file) {
return result;
}

std::vector<DecoderEngine>
open_strategy::engines(const std::shared_ptr<abstract::File> &file,
FileType as) {
std::vector<DecoderEngine> result;

result.push_back(DecoderEngine::odr);

if (as == FileType::legacy_word_document ||
as == FileType::legacy_powerpoint_presentation ||
as == FileType::legacy_excel_worksheets) {
result.push_back(DecoderEngine::wv_ware);
}

if (as == FileType::portable_document_format) {
result.push_back(DecoderEngine::poppler);
}

return result;
}

std::unique_ptr<abstract::DecodedFile>
open_strategy::open_file(std::shared_ptr<abstract::File> file) {
auto file_type = magic::file_type(*file);
Expand Down Expand Up @@ -167,10 +188,208 @@ open_strategy::open_file(std::shared_ptr<abstract::File> file) {
}

std::unique_ptr<abstract::DecodedFile>
open_strategy::open_file(std::shared_ptr<abstract::File> /*file*/,
const FileType /*as*/) {
// TODO implement
throw UnknownFileType();
open_strategy::open_file(std::shared_ptr<abstract::File> file, FileType as) {
DecodePreference preference;
preference.as_file_type = as;
return open_file(file, preference);
}

std::unique_ptr<abstract::DecodedFile>
open_strategy::open_file(std::shared_ptr<abstract::File> file, FileType as,
DecoderEngine with) {
if (as == FileType::opendocument_text ||
as == FileType::opendocument_presentation ||
as == FileType::opendocument_spreadsheet ||
as == FileType::opendocument_graphics) {
if (with == DecoderEngine::odr) {
try {
auto memory_file = std::make_shared<common::MemoryFile>(*file);
auto zip_file = std::make_unique<zip::ZipFile>(std::move(memory_file));
auto filesystem = zip_file->archive()->filesystem();
return std::make_unique<odf::OpenDocumentFile>(filesystem);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::office_open_xml_document ||
as == FileType::office_open_xml_presentation ||
as == FileType::office_open_xml_workbook ||
as == FileType::office_open_xml_encrypted) {
if (with == DecoderEngine::odr) {
try {
auto memory_file = std::make_shared<common::MemoryFile>(*file);
auto cfb_file = std::make_unique<cfb::CfbFile>(std::move(memory_file));
auto filesystem = cfb_file->archive()->filesystem();
return std::make_unique<ooxml::OfficeOpenXmlFile>(filesystem);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::legacy_word_document ||
as == FileType::legacy_excel_worksheets) {
if (with == DecoderEngine::odr) {
try {
auto memory_file = std::make_shared<common::MemoryFile>(*file);
auto cfb_file = std::make_unique<cfb::CfbFile>(std::move(memory_file));
auto filesystem = cfb_file->archive()->filesystem();
return std::make_unique<oldms::LegacyMicrosoftFile>(filesystem);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::portable_document_format) {
if (with == DecoderEngine::odr) {
try {
return std::make_unique<PdfFile>(file);
} catch (...) {
}
return nullptr;
}
if (with == DecoderEngine::poppler) {
try {
auto memory_file = std::make_shared<common::MemoryFile>(*file);
return std::make_unique<PopplerPdfFile>(memory_file);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::portable_network_graphics ||
as == FileType::graphics_interchange_format || as == FileType::jpeg ||
as == FileType::bitmap_image_file) {
if (with == DecoderEngine::odr) {
try {
return std::make_unique<common::ImageFile>(file, as);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::starview_metafile) {
if (with == DecoderEngine::odr) {
try {
auto memory_file = std::make_shared<common::MemoryFile>(*file);
return std::make_unique<svm::SvmFile>(memory_file);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::text_file) {
if (with == DecoderEngine::odr) {
try {
return std::make_unique<text::TextFile>(file);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::comma_separated_values) {
if (with == DecoderEngine::odr) {
try {
auto text = std::make_shared<text::TextFile>(file);
return std::make_unique<csv::CsvFile>(text);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::javascript_object_notation) {
if (with == DecoderEngine::odr) {
try {
auto text = std::make_shared<text::TextFile>(file);
return std::make_unique<json::JsonFile>(text);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::zip) {
if (with == DecoderEngine::odr) {
try {
auto memory_file = std::make_shared<common::MemoryFile>(*file);
return std::make_unique<zip::ZipFile>(memory_file);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

if (as == FileType::compound_file_binary_format) {
if (with == DecoderEngine::odr) {
try {
auto memory_file = std::make_shared<common::MemoryFile>(*file);
return std::make_unique<cfb::CfbFile>(memory_file);
} catch (...) {
}
return nullptr;
}
return nullptr;
}

return nullptr;
}

std::unique_ptr<abstract::DecodedFile>
open_strategy::open_file(std::shared_ptr<abstract::File> file,
const DecodePreference &preference) {
std::vector<FileType> probe_types =
preference.as_file_type.has_value()
? std::vector{*preference.as_file_type}
: preference.file_type_priority;
{
std::vector<FileType> detected_types = types(file);
probe_types.insert(probe_types.end(), detected_types.begin(),
detected_types.end());
auto probe_types_end = std::unique(probe_types.begin(), probe_types.end());
probe_types.erase(probe_types_end, probe_types.end());
}

for (FileType as : probe_types) {
std::vector<DecoderEngine> probe_engines =
preference.with_engine.has_value()
? std::vector{*preference.with_engine}
: preference.engine_priority;
{
std::vector<DecoderEngine> detected_engines = engines(file, as);
probe_engines.insert(probe_engines.end(), detected_engines.begin(),
detected_engines.end());
auto probe_engines_end =
std::unique(probe_engines.begin(), probe_engines.end());
probe_engines.erase(probe_engines_end, probe_engines.end());
}

for (DecoderEngine with : probe_engines) {
auto decoded_file = open_file(file, as, with);
if (decoded_file != nullptr) {
return decoded_file;
}
}
}

return nullptr;
}

std::unique_ptr<abstract::DocumentFile>
Expand Down
8 changes: 3 additions & 5 deletions src/odr/internal/open_strategy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
namespace odr {
enum class FileType;
enum class DecoderEngine;
struct DecodePreferences;
struct DecodePreference;
} // namespace odr

namespace odr::internal::abstract {
Expand All @@ -30,15 +30,13 @@ std::unique_ptr<internal::abstract::DecodedFile>
open_file(std::shared_ptr<internal::abstract::File> file);
std::unique_ptr<internal::abstract::DecodedFile>
open_file(std::shared_ptr<internal::abstract::File> file, FileType as);

std::unique_ptr<internal::abstract::DecodedFile>
open_file(std::shared_ptr<internal::abstract::File> file, FileType as,
DecoderEngine with);
std::unique_ptr<internal::abstract::DecodedFile>
open_file(std::shared_ptr<internal::abstract::File> file,
const DecodePreferences &preferences);
std::unique_ptr<internal::abstract::DecodedFile>
open_file(std::shared_ptr<internal::abstract::File> file, FileType as,
const DecodePreferences &preferences);
const DecodePreference &preference);

std::unique_ptr<internal::abstract::DocumentFile>
open_document_file(std::shared_ptr<internal::abstract::File> file);
Expand Down
16 changes: 8 additions & 8 deletions src/odr/internal/pdf/pdf_file.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@ namespace odr::internal {
PdfFile::PdfFile(std::shared_ptr<abstract::File> file)
: m_file{std::move(file)} {}

FileCategory PdfFile::file_category() const noexcept {
return FileCategory::document;
}

std::shared_ptr<abstract::File> PdfFile::file() const noexcept {
return m_file;
}

FileType PdfFile::file_type() const noexcept {
return FileType::portable_document_format;
}

FileMeta PdfFile::file_meta() const noexcept { return {}; }

DecoderEngine PdfFile::decoder_engine() const noexcept {
return DecoderEngine::odr;
}

bool PdfFile::password_encrypted() const noexcept { return false; }

EncryptionState PdfFile::encryption_state() const noexcept {
return EncryptionState::not_encrypted;
}

bool PdfFile::decrypt(const std::string &) { return false; }

} // namespace odr::internal
8 changes: 5 additions & 3 deletions src/odr/internal/pdf/pdf_file.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@

namespace odr::internal {

class PdfFile : public abstract::DecodedFile {
class PdfFile final : public abstract::PdfFile {
public:
explicit PdfFile(std::shared_ptr<abstract::File> file);

[[nodiscard]] std::shared_ptr<abstract::File> file() const noexcept final;

[[nodiscard]] FileType file_type() const noexcept final;
[[nodiscard]] FileCategory file_category() const noexcept final;
[[nodiscard]] FileMeta file_meta() const noexcept final;
[[nodiscard]] DecoderEngine decoder_engine() const noexcept final;

[[nodiscard]] bool password_encrypted() const noexcept final;
[[nodiscard]] EncryptionState encryption_state() const noexcept final;
[[nodiscard]] bool decrypt(const std::string &password) final;

private:
std::shared_ptr<abstract::File> m_file;
};
Expand Down
Loading

0 comments on commit 0cd4d85

Please sign in to comment.