From fa02f299248acd56ac99b23ac7a3f5953132864c Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 00:26:49 +0300 Subject: [PATCH 01/97] Build with pdf2htmlEX --- .github/config/macos-13-clang-14/conan/profiles/default | 2 ++ .../macos-13-clang-14/conan/profiles/pdf2htmlEX-config | 6 ++++++ .github/config/macos-14-clang-14/conan/profiles/default | 2 ++ .../macos-14-clang-14/conan/profiles/pdf2htmlEX-config | 6 ++++++ .github/config/ubuntu-24.04-clang-16/conan/profiles/default | 2 ++ .../ubuntu-24.04-clang-16/conan/profiles/pdf2htmlEX-config | 6 ++++++ .github/config/ubuntu-24.04-gcc-12/conan/profiles/default | 2 ++ .../ubuntu-24.04-gcc-12/conan/profiles/pdf2htmlEX-config | 6 ++++++ .../config/windows-2022-msvc-1939/conan/profiles/default | 2 ++ .../windows-2022-msvc-1939/conan/profiles/pdf2htmlEX-config | 6 ++++++ conanfile.py | 1 + 11 files changed, 41 insertions(+) create mode 100644 .github/config/macos-13-clang-14/conan/profiles/pdf2htmlEX-config create mode 100644 .github/config/macos-14-clang-14/conan/profiles/pdf2htmlEX-config create mode 100644 .github/config/ubuntu-24.04-clang-16/conan/profiles/pdf2htmlEX-config create mode 100644 .github/config/ubuntu-24.04-gcc-12/conan/profiles/pdf2htmlEX-config create mode 100644 .github/config/windows-2022-msvc-1939/conan/profiles/pdf2htmlEX-config diff --git a/.github/config/macos-13-clang-14/conan/profiles/default b/.github/config/macos-13-clang-14/conan/profiles/default index 47c4a5f8..5a0f6cd2 100644 --- a/.github/config/macos-13-clang-14/conan/profiles/default +++ b/.github/config/macos-13-clang-14/conan/profiles/default @@ -1,3 +1,5 @@ +include(pdf2htmlEX-config) + [settings] arch=x86_64 build_type=Release diff --git a/.github/config/macos-13-clang-14/conan/profiles/pdf2htmlEX-config b/.github/config/macos-13-clang-14/conan/profiles/pdf2htmlEX-config new file mode 100644 index 00000000..10f7c1f9 --- /dev/null +++ b/.github/config/macos-13-clang-14/conan/profiles/pdf2htmlEX-config @@ -0,0 +1,6 @@ +[options] +# @TODO: Fix linker errors caused by the absense of these options +# Fontforge build failure if GLib built with mount +glib/*:with_mount=False +# Fontforge build failure if FreeType built with Brotli +freetype/*:with_brotli=False diff --git a/.github/config/macos-14-clang-14/conan/profiles/default b/.github/config/macos-14-clang-14/conan/profiles/default index 47c4a5f8..5a0f6cd2 100644 --- a/.github/config/macos-14-clang-14/conan/profiles/default +++ b/.github/config/macos-14-clang-14/conan/profiles/default @@ -1,3 +1,5 @@ +include(pdf2htmlEX-config) + [settings] arch=x86_64 build_type=Release diff --git a/.github/config/macos-14-clang-14/conan/profiles/pdf2htmlEX-config b/.github/config/macos-14-clang-14/conan/profiles/pdf2htmlEX-config new file mode 100644 index 00000000..10f7c1f9 --- /dev/null +++ b/.github/config/macos-14-clang-14/conan/profiles/pdf2htmlEX-config @@ -0,0 +1,6 @@ +[options] +# @TODO: Fix linker errors caused by the absense of these options +# Fontforge build failure if GLib built with mount +glib/*:with_mount=False +# Fontforge build failure if FreeType built with Brotli +freetype/*:with_brotli=False diff --git a/.github/config/ubuntu-24.04-clang-16/conan/profiles/default b/.github/config/ubuntu-24.04-clang-16/conan/profiles/default index d452f6fd..ea6a86be 100644 --- a/.github/config/ubuntu-24.04-clang-16/conan/profiles/default +++ b/.github/config/ubuntu-24.04-clang-16/conan/profiles/default @@ -1,3 +1,5 @@ +include(pdf2htmlEX-config) + [settings] arch=x86_64 build_type=Release diff --git a/.github/config/ubuntu-24.04-clang-16/conan/profiles/pdf2htmlEX-config b/.github/config/ubuntu-24.04-clang-16/conan/profiles/pdf2htmlEX-config new file mode 100644 index 00000000..10f7c1f9 --- /dev/null +++ b/.github/config/ubuntu-24.04-clang-16/conan/profiles/pdf2htmlEX-config @@ -0,0 +1,6 @@ +[options] +# @TODO: Fix linker errors caused by the absense of these options +# Fontforge build failure if GLib built with mount +glib/*:with_mount=False +# Fontforge build failure if FreeType built with Brotli +freetype/*:with_brotli=False diff --git a/.github/config/ubuntu-24.04-gcc-12/conan/profiles/default b/.github/config/ubuntu-24.04-gcc-12/conan/profiles/default index efe73c3a..10a6a6c1 100644 --- a/.github/config/ubuntu-24.04-gcc-12/conan/profiles/default +++ b/.github/config/ubuntu-24.04-gcc-12/conan/profiles/default @@ -1,3 +1,5 @@ +include(pdf2htmlEX-config) + [settings] arch=x86_64 build_type=Release diff --git a/.github/config/ubuntu-24.04-gcc-12/conan/profiles/pdf2htmlEX-config b/.github/config/ubuntu-24.04-gcc-12/conan/profiles/pdf2htmlEX-config new file mode 100644 index 00000000..10f7c1f9 --- /dev/null +++ b/.github/config/ubuntu-24.04-gcc-12/conan/profiles/pdf2htmlEX-config @@ -0,0 +1,6 @@ +[options] +# @TODO: Fix linker errors caused by the absense of these options +# Fontforge build failure if GLib built with mount +glib/*:with_mount=False +# Fontforge build failure if FreeType built with Brotli +freetype/*:with_brotli=False diff --git a/.github/config/windows-2022-msvc-1939/conan/profiles/default b/.github/config/windows-2022-msvc-1939/conan/profiles/default index 4bfbb647..59199190 100644 --- a/.github/config/windows-2022-msvc-1939/conan/profiles/default +++ b/.github/config/windows-2022-msvc-1939/conan/profiles/default @@ -1,3 +1,5 @@ +include(pdf2htmlEX-config) + [settings] arch=x86_64 build_type=Release diff --git a/.github/config/windows-2022-msvc-1939/conan/profiles/pdf2htmlEX-config b/.github/config/windows-2022-msvc-1939/conan/profiles/pdf2htmlEX-config new file mode 100644 index 00000000..10f7c1f9 --- /dev/null +++ b/.github/config/windows-2022-msvc-1939/conan/profiles/pdf2htmlEX-config @@ -0,0 +1,6 @@ +[options] +# @TODO: Fix linker errors caused by the absense of these options +# Fontforge build failure if GLib built with mount +glib/*:with_mount=False +# Fontforge build failure if FreeType built with Brotli +freetype/*:with_brotli=False diff --git a/conanfile.py b/conanfile.py index 9b858f32..59f02800 100644 --- a/conanfile.py +++ b/conanfile.py @@ -34,6 +34,7 @@ def requirements(self): self.requires("vincentlaucsb-csv-parser/2.1.3") self.requires("uchardet/0.0.7") self.requires("utfcpp/4.0.4") + self.requires("pdf2htmlex/0.18.8.rc1-20240805-git") def build_requirements(self): self.test_requires("gtest/1.14.0") From ce3d37645ddcea6b66c521c81e9782b336a87d0c Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 00:46:28 +0300 Subject: [PATCH 02/97] Macos-14 image is actually arm64, not x86_64. Fix conan profile --- .github/config/macos-14-clang-14/conan/profiles/default | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/config/macos-14-clang-14/conan/profiles/default b/.github/config/macos-14-clang-14/conan/profiles/default index 5a0f6cd2..92f364e4 100644 --- a/.github/config/macos-14-clang-14/conan/profiles/default +++ b/.github/config/macos-14-clang-14/conan/profiles/default @@ -1,7 +1,7 @@ include(pdf2htmlEX-config) [settings] -arch=x86_64 +arch=armv8 build_type=Release compiler=apple-clang compiler.version=14 From cf15af399fe2cd929117bf975629373db05b2c3c Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 01:14:44 +0300 Subject: [PATCH 03/97] Add odr to default remote position, not 0. This may reduce some load on on odr artifactory --- .github/workflows/build_test.yml | 4 ++-- .github/workflows/publish.yml | 2 +- .github/workflows/tidy.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 2fd163bf..ed90d590 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -50,7 +50,7 @@ jobs: run: pip install conan - name: conan remote - run: conan remote add --index 0 odr https://artifactory.opendocument.app/artifactory/api/conan/conan + run: conan remote add odr https://artifactory.opendocument.app/artifactory/api/conan/conan - name: conan login run: conan remote login odr admin --password ${{ secrets.ARTIFACTORY }} - name: conan config @@ -247,7 +247,7 @@ jobs: run: pip install conan - name: conan remote - run: conan remote add --index 0 odr https://artifactory.opendocument.app/artifactory/api/conan/conan + run: conan remote add odr https://artifactory.opendocument.app/artifactory/api/conan/conan - name: conan config run: conan config install .github/config/${{ matrix.os }}-${{ matrix.compiler }}/conan diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index fff0242a..abc1b16b 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -24,7 +24,7 @@ jobs: run: pip install --upgrade pip conan - name: conan remote - run: conan remote add --index 0 odr https://artifactory.opendocument.app/artifactory/api/conan/conan + run: conan remote add odr https://artifactory.opendocument.app/artifactory/api/conan/conan - name: conan login run: conan remote login odr admin --password ${{ secrets.ARTIFACTORY }} - name: conan config diff --git a/.github/workflows/tidy.yml b/.github/workflows/tidy.yml index 0038e522..30d4627a 100644 --- a/.github/workflows/tidy.yml +++ b/.github/workflows/tidy.yml @@ -32,7 +32,7 @@ jobs: run: pip install --upgrade pip conan - name: conan remote - run: conan remote add --index 0 odr https://artifactory.opendocument.app/artifactory/api/conan/conan + run: conan remote add odr https://artifactory.opendocument.app/artifactory/api/conan/conan - name: conan config run: conan config install .github/config/${{ matrix.os }}-${{ matrix.compiler }}/conan - name: conan install From 21d90437195d70cecc7c4a35138c02d70cdb1fe5 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 02:53:36 +0300 Subject: [PATCH 04/97] Use msvc-19.40 compiler instead of 19.39 --- .../conan/profiles/default | 2 +- .../conan/profiles/pdf2htmlEX-config | 0 .github/workflows/build_test.yml | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename .github/config/{windows-2022-msvc-1939 => windows-2022-msvc-1940}/conan/profiles/default (86%) rename .github/config/{windows-2022-msvc-1939 => windows-2022-msvc-1940}/conan/profiles/pdf2htmlEX-config (100%) diff --git a/.github/config/windows-2022-msvc-1939/conan/profiles/default b/.github/config/windows-2022-msvc-1940/conan/profiles/default similarity index 86% rename from .github/config/windows-2022-msvc-1939/conan/profiles/default rename to .github/config/windows-2022-msvc-1940/conan/profiles/default index 59199190..0283073c 100644 --- a/.github/config/windows-2022-msvc-1939/conan/profiles/default +++ b/.github/config/windows-2022-msvc-1940/conan/profiles/default @@ -4,7 +4,7 @@ include(pdf2htmlEX-config) arch=x86_64 build_type=Release compiler=msvc -compiler.version=193 +compiler.version=194 compiler.cppstd=20 compiler.runtime=dynamic os=Windows diff --git a/.github/config/windows-2022-msvc-1939/conan/profiles/pdf2htmlEX-config b/.github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config similarity index 100% rename from .github/config/windows-2022-msvc-1939/conan/profiles/pdf2htmlEX-config rename to .github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index ed90d590..4d2eb415 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -26,7 +26,7 @@ jobs: - { os: ubuntu-24.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: clang-14 } - - { os: windows-2022, compiler: msvc-1939 } + - { os: windows-2022, compiler: msvc-1940 } steps: - name: checkout uses: actions/checkout@v4 From 2a5baf14df90eb0679cebb102f5aad836e46c1fa Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 03:45:08 +0300 Subject: [PATCH 05/97] Disable msvc compiler --- .github/workflows/build_test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 4d2eb415..569815fb 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -26,7 +26,6 @@ jobs: - { os: ubuntu-24.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: clang-14 } - - { os: windows-2022, compiler: msvc-1940 } steps: - name: checkout uses: actions/checkout@v4 @@ -214,6 +213,7 @@ jobs: build/test/output/odr-private/output build-test-downstream: + needs: build runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -223,7 +223,6 @@ jobs: - { os: ubuntu-24.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: clang-14 } - - { os: windows-2022, compiler: msvc-1939 } steps: - name: checkout uses: actions/checkout@v4 From e38d0c8cd8f75d7e4dba198262a62357cff413aa Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 17:03:44 +0300 Subject: [PATCH 06/97] Revert "Disable msvc compiler" This reverts commit 2a5baf14df90eb0679cebb102f5aad836e46c1fa. --- .github/workflows/build_test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 569815fb..4d2eb415 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -26,6 +26,7 @@ jobs: - { os: ubuntu-24.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: clang-14 } + - { os: windows-2022, compiler: msvc-1940 } steps: - name: checkout uses: actions/checkout@v4 @@ -213,7 +214,6 @@ jobs: build/test/output/odr-private/output build-test-downstream: - needs: build runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -223,6 +223,7 @@ jobs: - { os: ubuntu-24.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: clang-14 } + - { os: windows-2022, compiler: msvc-1939 } steps: - name: checkout uses: actions/checkout@v4 From 0176499b3c33e59d9084cd0fd8401a63e52bcd27 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 17:04:55 +0300 Subject: [PATCH 07/97] Don't depend on pdf2htmlEX when building for Windows --- conanfile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conanfile.py b/conanfile.py index 59f02800..f42c40bd 100644 --- a/conanfile.py +++ b/conanfile.py @@ -34,7 +34,8 @@ def requirements(self): self.requires("vincentlaucsb-csv-parser/2.1.3") self.requires("uchardet/0.0.7") self.requires("utfcpp/4.0.4") - self.requires("pdf2htmlex/0.18.8.rc1-20240805-git") + if self.settings.os != "Windows": + self.requires("pdf2htmlex/0.18.8.rc1-20240805-git") def build_requirements(self): self.test_requires("gtest/1.14.0") From 75b9f5648cf64b182e9aa6e6c6da718c029d41eb Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 17:08:20 +0300 Subject: [PATCH 08/97] Revert msvc compiler to 19.39. Remove pdf2htmlEX config from it's conan profile --- .../conan/profiles/default | 4 +--- .../windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config | 6 ------ .github/workflows/build_test.yml | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) rename .github/config/{windows-2022-msvc-1940 => windows-2022-msvc-1939}/conan/profiles/default (69%) delete mode 100644 .github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config diff --git a/.github/config/windows-2022-msvc-1940/conan/profiles/default b/.github/config/windows-2022-msvc-1939/conan/profiles/default similarity index 69% rename from .github/config/windows-2022-msvc-1940/conan/profiles/default rename to .github/config/windows-2022-msvc-1939/conan/profiles/default index 0283073c..4bfbb647 100644 --- a/.github/config/windows-2022-msvc-1940/conan/profiles/default +++ b/.github/config/windows-2022-msvc-1939/conan/profiles/default @@ -1,10 +1,8 @@ -include(pdf2htmlEX-config) - [settings] arch=x86_64 build_type=Release compiler=msvc -compiler.version=194 +compiler.version=193 compiler.cppstd=20 compiler.runtime=dynamic os=Windows diff --git a/.github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config b/.github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config deleted file mode 100644 index 10f7c1f9..00000000 --- a/.github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config +++ /dev/null @@ -1,6 +0,0 @@ -[options] -# @TODO: Fix linker errors caused by the absense of these options -# Fontforge build failure if GLib built with mount -glib/*:with_mount=False -# Fontforge build failure if FreeType built with Brotli -freetype/*:with_brotli=False diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 4d2eb415..ed90d590 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -26,7 +26,7 @@ jobs: - { os: ubuntu-24.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: clang-14 } - - { os: windows-2022, compiler: msvc-1940 } + - { os: windows-2022, compiler: msvc-1939 } steps: - name: checkout uses: actions/checkout@v4 From 11060ccf024fe64628f7edfde2709abd282a393f Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 17:14:13 +0300 Subject: [PATCH 09/97] Add msvc-19.40, alongside 19.39 --- .../config/windows-2022-msvc-1940/conan/profiles/default | 8 ++++++++ .github/workflows/build_test.yml | 2 ++ 2 files changed, 10 insertions(+) create mode 100644 .github/config/windows-2022-msvc-1940/conan/profiles/default diff --git a/.github/config/windows-2022-msvc-1940/conan/profiles/default b/.github/config/windows-2022-msvc-1940/conan/profiles/default new file mode 100644 index 00000000..cc618806 --- /dev/null +++ b/.github/config/windows-2022-msvc-1940/conan/profiles/default @@ -0,0 +1,8 @@ +[settings] +arch=x86_64 +build_type=Release +compiler=msvc +compiler.version=194 +compiler.cppstd=20 +compiler.runtime=dynamic +os=Windows diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index ed90d590..a3f0eb3c 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -27,6 +27,7 @@ jobs: - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: clang-14 } - { os: windows-2022, compiler: msvc-1939 } + - { os: windows-2022, compiler: msvc-1940 } steps: - name: checkout uses: actions/checkout@v4 @@ -224,6 +225,7 @@ jobs: - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: clang-14 } - { os: windows-2022, compiler: msvc-1939 } + - { os: windows-2022, compiler: msvc-1940 } steps: - name: checkout uses: actions/checkout@v4 From 51321cbd5a25d74ed1487110f7409e23bf42ce7d Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 17:15:23 +0300 Subject: [PATCH 10/97] Raise min cppstd to 20 --- conanfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conanfile.py b/conanfile.py index f42c40bd..d2f1e0c7 100644 --- a/conanfile.py +++ b/conanfile.py @@ -42,7 +42,7 @@ def build_requirements(self): def validate_build(self): if self.settings.get_safe("compiler.cppstd"): - check_min_cppstd(self, 17) + check_min_cppstd(self, 20) def generate(self): tc = CMakeToolchain(self) From ce3fd9bcf018e068597f9a97095619cde2eca982 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 17:40:06 +0300 Subject: [PATCH 11/97] "Fix" shared/fPIC build options. BUILD_SHARED_LIBS is already passed to CMake by CMakeToolchain --- CMakeLists.txt | 1 + conanfile.py | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b94ea785..6ddee702 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,7 @@ set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) +option(BUILD_SHARED_LIBS "Build using shared libraries" ON) option(ODR_TEST "enable tests" OFF) option(ODR_CLANG_TIDY "Run clang-tidy static analysis" OFF) diff --git a/conanfile.py b/conanfile.py index d2f1e0c7..521133cc 100644 --- a/conanfile.py +++ b/conanfile.py @@ -26,6 +26,14 @@ class OpenDocumentCoreConan(ConanFile): exports_sources = ["cli/*", "cmake/*", "src/*", "CMakeLists.txt"] + def config_options(self): + if self.settings.os == "Windows": + del self.options.fPIC + + def configure(self): + if self.options.shared: + self.options.rm_safe("fPIC") + def requirements(self): self.requires("pugixml/1.14") self.requires("cryptopp/8.8.0") @@ -47,7 +55,6 @@ def validate_build(self): def generate(self): tc = CMakeToolchain(self) tc.variables["CMAKE_PROJECT_VERSION"] = self.version - tc.variables["BUILD_SHARED_LIBS"] = self.options.shared tc.variables["ODR_TEST"] = False tc.generate() From 597955fa36c4b56e808c084d8fa611f970ca25ba Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 18:18:07 +0300 Subject: [PATCH 12/97] Add with_pdf2htmlEX option. Attempt to default disable for Windows. May or may not work --- conanfile.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/conanfile.py b/conanfile.py index 521133cc..c291a776 100644 --- a/conanfile.py +++ b/conanfile.py @@ -18,10 +18,12 @@ class OpenDocumentCoreConan(ConanFile): options = { "shared": [True, False], "fPIC": [True, False], + "with_pdf2htmlEX": [True, False], } default_options = { "shared": False, "fPIC": True, + "with_pdf2htmlEX": True, } exports_sources = ["cli/*", "cmake/*", "src/*", "CMakeLists.txt"] @@ -29,6 +31,7 @@ class OpenDocumentCoreConan(ConanFile): def config_options(self): if self.settings.os == "Windows": del self.options.fPIC + self.default_options['with_pdf2htmlEX'] = False def configure(self): if self.options.shared: @@ -42,7 +45,7 @@ def requirements(self): self.requires("vincentlaucsb-csv-parser/2.1.3") self.requires("uchardet/0.0.7") self.requires("utfcpp/4.0.4") - if self.settings.os != "Windows": + if self.options.get_safe("with_pdf2htmlEX"): self.requires("pdf2htmlex/0.18.8.rc1-20240805-git") def build_requirements(self): From 2f0978da863cc387411ede73ed6afdcf119edf69 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 18:24:25 +0300 Subject: [PATCH 13/97] Remove with_pdf2htmlEX option on Windows --- conanfile.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/conanfile.py b/conanfile.py index c291a776..a47e1e31 100644 --- a/conanfile.py +++ b/conanfile.py @@ -31,7 +31,9 @@ class OpenDocumentCoreConan(ConanFile): def config_options(self): if self.settings.os == "Windows": del self.options.fPIC - self.default_options['with_pdf2htmlEX'] = False + # @TODO: ideally Windows should just default_options['with_pdf2htmlEX'] = False + # But by the time config_options() is executed, default_options is already done parsed. + del self.options.with_pdf2htmlEX def configure(self): if self.options.shared: From 982abb6b8e3356e999c1d12267f2d721264857a3 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 18:33:41 +0300 Subject: [PATCH 14/97] Link against pdf2htmlEX in CMake --- CMakeLists.txt | 6 ++++++ conanfile.py | 1 + 2 files changed, 7 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ddee702..5b1167d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ set(CMAKE_CXX_EXTENSIONS OFF) option(BUILD_SHARED_LIBS "Build using shared libraries" ON) option(ODR_TEST "enable tests" OFF) option(ODR_CLANG_TIDY "Run clang-tidy static analysis" OFF) +option(WITH_PDF2HTMLEX "Build with pdf2htmlEX" ON) # TODO defining global compiler flags seems to be bad practice with conan # TODO consider using conan profiles @@ -188,6 +189,11 @@ target_link_libraries(odr utf8::cpp ) +if(WITH_PDF2HTMLEX) + find_package(pdf2htmlEX REQUIRED) + target_link_libraries(odr PRIVATE pdf2htmlex::pdf2htmlex) +endif(WITH_PDF2HTMLEX) + if (EXISTS "${PROJECT_SOURCE_DIR}/.git") add_dependencies(odr check_git) endif () diff --git a/conanfile.py b/conanfile.py index a47e1e31..e94801d5 100644 --- a/conanfile.py +++ b/conanfile.py @@ -61,6 +61,7 @@ def generate(self): tc = CMakeToolchain(self) tc.variables["CMAKE_PROJECT_VERSION"] = self.version tc.variables["ODR_TEST"] = False + tc.variables["WITH_PDF2HTMLEX"] = self.options.get_safe("with_pdf2htmlEX") tc.generate() deps = CMakeDeps(self) From 7302ce1ff825983bb6f4386919d81e7a07a3450b Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 8 Aug 2024 18:43:09 +0300 Subject: [PATCH 15/97] Pass WITH_PDF2HTMLEX parameter to CMake correctly --- conanfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conanfile.py b/conanfile.py index e94801d5..62fd9481 100644 --- a/conanfile.py +++ b/conanfile.py @@ -61,7 +61,7 @@ def generate(self): tc = CMakeToolchain(self) tc.variables["CMAKE_PROJECT_VERSION"] = self.version tc.variables["ODR_TEST"] = False - tc.variables["WITH_PDF2HTMLEX"] = self.options.get_safe("with_pdf2htmlEX") + tc.variables["WITH_PDF2HTMLEX"] = self.options.get_safe("with_pdf2htmlEX", False) tc.generate() deps = CMakeDeps(self) From 6fe7b1ca8c618fc1727f99ee08467df286df0655 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 15 Aug 2024 13:52:39 +0300 Subject: [PATCH 16/97] Add pdf2htmlEX_wrapper, actually call pdf2htmlEX --- CMakeLists.txt | 36 ++++++++++------ src/odr/html.cpp | 8 ++++ src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 45 ++++++++++++++++++++ src/odr/internal/html/pdf2htmlEX_wrapper.hpp | 20 +++++++++ 4 files changed, 96 insertions(+), 13 deletions(-) create mode 100644 src/odr/internal/html/pdf2htmlEX_wrapper.cpp create mode 100644 src/odr/internal/html/pdf2htmlEX_wrapper.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b1167d3..1da59c27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,7 +103,6 @@ set(ODR_SOURCE_FILES "src/odr/internal/html/filesystem.cpp" "src/odr/internal/html/html_writer.cpp" "src/odr/internal/html/image_file.cpp" - "src/odr/internal/html/pdf_file.cpp" "src/odr/internal/html/text_file.cpp" "src/odr/internal/json/json_file.cpp" @@ -137,19 +136,7 @@ set(ODR_SOURCE_FILES "src/odr/internal/ooxml/ooxml_meta.cpp" "src/odr/internal/ooxml/ooxml_util.cpp" - "src/odr/internal/pdf/pdf_cmap.cpp" - "src/odr/internal/pdf/pdf_cmap_parser.cpp" - "src/odr/internal/pdf/pdf_document.cpp" - "src/odr/internal/pdf/pdf_document_element.cpp" - "src/odr/internal/pdf/pdf_document_parser.cpp" "src/odr/internal/pdf/pdf_file.cpp" - "src/odr/internal/pdf/pdf_file_object.cpp" - "src/odr/internal/pdf/pdf_file_parser.cpp" - "src/odr/internal/pdf/pdf_graphics_operator.cpp" - "src/odr/internal/pdf/pdf_graphics_operator_parser.cpp" - "src/odr/internal/pdf/pdf_graphics_state.cpp" - "src/odr/internal/pdf/pdf_object.cpp" - "src/odr/internal/pdf/pdf_object_parser.cpp" "src/odr/internal/svm/svm_file.cpp" "src/odr/internal/svm/svm_format.cpp" @@ -171,6 +158,28 @@ set(ODR_SOURCE_FILES "src/odr/internal/zip/zip_file.cpp" "src/odr/internal/zip/zip_util.cpp" ) +if(WITH_PDF2HTMLEX) + LIST(APPEND ODR_SOURCE_FILES + "src/odr/internal/html/pdf2htmlEX_wrapper.cpp" + ) +else() + LIST(APPEND ODR_SOURCE_FILES + "src/odr/internal/html/pdf_file.cpp" + + "src/odr/internal/pdf/pdf_cmap.cpp" + "src/odr/internal/pdf/pdf_cmap_parser.cpp" + "src/odr/internal/pdf/pdf_document.cpp" + "src/odr/internal/pdf/pdf_document_element.cpp" + "src/odr/internal/pdf/pdf_document_parser.cpp" + "src/odr/internal/pdf/pdf_file_object.cpp" + "src/odr/internal/pdf/pdf_file_parser.cpp" + "src/odr/internal/pdf/pdf_graphics_operator.cpp" + "src/odr/internal/pdf/pdf_graphics_operator_parser.cpp" + "src/odr/internal/pdf/pdf_graphics_state.cpp" + "src/odr/internal/pdf/pdf_object.cpp" + "src/odr/internal/pdf/pdf_object_parser.cpp" + ) +endif(WITH_PDF2HTMLEX) add_library(odr ${ODR_SOURCE_FILES}) set_target_properties(odr PROPERTIES OUTPUT_NAME odr) @@ -190,6 +199,7 @@ target_link_libraries(odr ) if(WITH_PDF2HTMLEX) + target_compile_definitions(odr PRIVATE "WITH_PDF2HTMLEX=1") find_package(pdf2htmlEX REQUIRED) target_link_libraries(odr PRIVATE pdf2htmlex::pdf2htmlex) endif(WITH_PDF2HTMLEX) diff --git a/src/odr/html.cpp b/src/odr/html.cpp index 9a7fd62b..e9be87fe 100644 --- a/src/odr/html.cpp +++ b/src/odr/html.cpp @@ -9,7 +9,11 @@ #include #include #include +#if defined(WITH_PDF2HTMLEX) +#include +#else #include +#endif #include #include @@ -113,7 +117,11 @@ Html html::translate(const Document &document, const std::string &output_path, Html html::translate(const PdfFile &pdf_file, const std::string &output_path, const HtmlConfig &config) { fs::create_directories(output_path); +#if defined(WITH_PDF2HTMLEX) + return internal::html::pdf2htmlEX_wrapper(pdf_file, output_path, config); +#else return internal::html::translate_pdf_file(pdf_file, output_path, config); +#endif } void html::edit(const Document &document, const char *diff) { diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp new file mode 100644 index 00000000..f1f4ff63 --- /dev/null +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -0,0 +1,45 @@ +#include + +#include +#include +#include + +#include + + +#include + +#include + +namespace odr::internal { + +Html html::pdf2htmlEX_wrapper(const PdfFile &pdf_file, + const std::string &output_path, + const HtmlConfig &config) { + pdf2htmlEX::pdf2htmlEX pdf2htmlEX; + + auto disk_path = pdf_file.file().disk_path(); + if (!disk_path.has_value()) { + throw FileNotFound(); + } + pdf2htmlEX.setInputFilename(disk_path.value()); + pdf2htmlEX.setDestinationDir(output_path); + auto output_file_name = "document.html"; + pdf2htmlEX.setOutputFilename(output_file_name); + + try { + pdf2htmlEX.convert(); + } catch (const pdf2htmlEX::EncryptionPasswordException & e) { + throw WrongPassword(); + } catch (const pdf2htmlEX::DocumentCopyProtectedException & e) { + throw std::runtime_error("document is copy protected"); + } catch (const pdf2htmlEX::ConversionFailedException & e) { + throw std::runtime_error(std::string("conversion error ") + e.what()); + } + + return {FileType::portable_document_format, + config, + {{"document", output_path + "/" + output_file_name}}}; +} + +} // namespace odr::internal diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.hpp b/src/odr/internal/html/pdf2htmlEX_wrapper.hpp new file mode 100644 index 00000000..b577af86 --- /dev/null +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.hpp @@ -0,0 +1,20 @@ +#ifndef ODR_INTERNAL_PDF2HTMLEX_WRAPPER_HPP +#define ODR_INTERNAL_PDF2HTMLEX_WRAPPER_HPP + +#include + +namespace odr { +class PdfFile; + +struct HtmlConfig; +class Html; +} // namespace odr + +namespace odr::internal::html { + +Html pdf2htmlEX_wrapper(const PdfFile &pdf_file, const std::string &output_path, + const HtmlConfig &config); + +} + +#endif // ODR_INTERNAL_PDF2HTMLEX_WRAPPER_HPP From c4475b7bef07dc0329c5eac2b7b75f527da60dda Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 15 Aug 2024 13:57:57 +0300 Subject: [PATCH 17/97] Format pdf2htmlEX_wrapper.cpp --- src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index f1f4ff63..d02b09da 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -6,7 +6,6 @@ #include - #include #include @@ -29,11 +28,11 @@ Html html::pdf2htmlEX_wrapper(const PdfFile &pdf_file, try { pdf2htmlEX.convert(); - } catch (const pdf2htmlEX::EncryptionPasswordException & e) { + } catch (const pdf2htmlEX::EncryptionPasswordException &e) { throw WrongPassword(); - } catch (const pdf2htmlEX::DocumentCopyProtectedException & e) { + } catch (const pdf2htmlEX::DocumentCopyProtectedException &e) { throw std::runtime_error("document is copy protected"); - } catch (const pdf2htmlEX::ConversionFailedException & e) { + } catch (const pdf2htmlEX::ConversionFailedException &e) { throw std::runtime_error(std::string("conversion error ") + e.what()); } From 5c37f205c5a9dc21b8fa31de4a112ca49f7ec9b2 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 15 Aug 2024 14:07:06 +0300 Subject: [PATCH 18/97] Upgrade pdf2htmlEX to 20240814-git --- conanfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conanfile.py b/conanfile.py index afab4ceb..630498a4 100644 --- a/conanfile.py +++ b/conanfile.py @@ -36,7 +36,7 @@ def requirements(self): self.requires("uchardet/0.0.7") self.requires("utfcpp/4.0.4") if self.options.get_safe("with_pdf2htmlEX"): - self.requires("pdf2htmlex/0.18.8.rc1-20240805-git") + self.requires("pdf2htmlex/0.18.8.rc1-20240814-git") def build_requirements(self): self.test_requires("gtest/1.14.0") From 581767dd0362ffce23950e7c5a5be19dd91229f0 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 15 Aug 2024 14:23:09 +0300 Subject: [PATCH 19/97] Compile with original pdf files, even if they aren't used at runtime. They're needed for tests --- CMakeLists.txt | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1da59c27..ea6d7e97 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,6 +103,7 @@ set(ODR_SOURCE_FILES "src/odr/internal/html/filesystem.cpp" "src/odr/internal/html/html_writer.cpp" "src/odr/internal/html/image_file.cpp" + "src/odr/internal/html/pdf_file.cpp" "src/odr/internal/html/text_file.cpp" "src/odr/internal/json/json_file.cpp" @@ -136,7 +137,19 @@ set(ODR_SOURCE_FILES "src/odr/internal/ooxml/ooxml_meta.cpp" "src/odr/internal/ooxml/ooxml_util.cpp" + "src/odr/internal/pdf/pdf_cmap.cpp" + "src/odr/internal/pdf/pdf_cmap_parser.cpp" + "src/odr/internal/pdf/pdf_document.cpp" + "src/odr/internal/pdf/pdf_document_element.cpp" + "src/odr/internal/pdf/pdf_document_parser.cpp" "src/odr/internal/pdf/pdf_file.cpp" + "src/odr/internal/pdf/pdf_file_object.cpp" + "src/odr/internal/pdf/pdf_file_parser.cpp" + "src/odr/internal/pdf/pdf_graphics_operator.cpp" + "src/odr/internal/pdf/pdf_graphics_operator_parser.cpp" + "src/odr/internal/pdf/pdf_graphics_state.cpp" + "src/odr/internal/pdf/pdf_object.cpp" + "src/odr/internal/pdf/pdf_object_parser.cpp" "src/odr/internal/svm/svm_file.cpp" "src/odr/internal/svm/svm_format.cpp" @@ -162,23 +175,6 @@ if(WITH_PDF2HTMLEX) LIST(APPEND ODR_SOURCE_FILES "src/odr/internal/html/pdf2htmlEX_wrapper.cpp" ) -else() - LIST(APPEND ODR_SOURCE_FILES - "src/odr/internal/html/pdf_file.cpp" - - "src/odr/internal/pdf/pdf_cmap.cpp" - "src/odr/internal/pdf/pdf_cmap_parser.cpp" - "src/odr/internal/pdf/pdf_document.cpp" - "src/odr/internal/pdf/pdf_document_element.cpp" - "src/odr/internal/pdf/pdf_document_parser.cpp" - "src/odr/internal/pdf/pdf_file_object.cpp" - "src/odr/internal/pdf/pdf_file_parser.cpp" - "src/odr/internal/pdf/pdf_graphics_operator.cpp" - "src/odr/internal/pdf/pdf_graphics_operator_parser.cpp" - "src/odr/internal/pdf/pdf_graphics_state.cpp" - "src/odr/internal/pdf/pdf_object.cpp" - "src/odr/internal/pdf/pdf_object_parser.cpp" - ) endif(WITH_PDF2HTMLEX) add_library(odr ${ODR_SOURCE_FILES}) From cc091d991f085e851581c108e126893e366df6de Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 15 Aug 2024 17:46:14 +0300 Subject: [PATCH 20/97] Process annotation, don't process outline. Ignore drm. DRM is for copying, this is for reading --- src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index d02b09da..2c3ee948 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -6,8 +6,6 @@ #include -#include - #include namespace odr::internal { @@ -26,6 +24,15 @@ Html html::pdf2htmlEX_wrapper(const PdfFile &pdf_file, auto output_file_name = "document.html"; pdf2htmlEX.setOutputFilename(output_file_name); + pdf2htmlEX.setDRM(false); + pdf2htmlEX.setProcessOutline(false); + pdf2htmlEX.setProcessAnnotation(true); + + // @TODO: +// if (options.password != null) { +// pdf2htmlEX.setOwnerPassword(options.password).setUserPassword(options.password); +// } + try { pdf2htmlEX.convert(); } catch (const pdf2htmlEX::EncryptionPasswordException &e) { From 71622a0db6a96774512fb13888f83117920875cf Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 15 Aug 2024 17:47:26 +0300 Subject: [PATCH 21/97] formatting --- src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index 2c3ee948..30fc7fa0 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -29,9 +29,9 @@ Html html::pdf2htmlEX_wrapper(const PdfFile &pdf_file, pdf2htmlEX.setProcessAnnotation(true); // @TODO: -// if (options.password != null) { -// pdf2htmlEX.setOwnerPassword(options.password).setUserPassword(options.password); -// } + // if (options.password != null) { + // pdf2htmlEX.setOwnerPassword(options.password).setUserPassword(options.password); + // } try { pdf2htmlEX.convert(); From 5ff2c619fb8e66f2f64ee58365755be3e137c3ed Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Fri, 16 Aug 2024 14:17:55 +0300 Subject: [PATCH 22/97] Fix macos-14 pdf2htmlEX conan profile --- .../conan/profiles/pdf2htmlEX-config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/config/macos-14-armv8-clang-14/conan/profiles/pdf2htmlEX-config b/.github/config/macos-14-armv8-clang-14/conan/profiles/pdf2htmlEX-config index e69de29b..10f7c1f9 100644 --- a/.github/config/macos-14-armv8-clang-14/conan/profiles/pdf2htmlEX-config +++ b/.github/config/macos-14-armv8-clang-14/conan/profiles/pdf2htmlEX-config @@ -0,0 +1,6 @@ +[options] +# @TODO: Fix linker errors caused by the absense of these options +# Fontforge build failure if GLib built with mount +glib/*:with_mount=False +# Fontforge build failure if FreeType built with Brotli +freetype/*:with_brotli=False From b7120b94c7fd5357644afd70457f4c877d41f7af Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Fri, 16 Aug 2024 19:25:54 +0300 Subject: [PATCH 23/97] Remove pdf2htmlEX-config. Default options should work now --- .github/config/macos-13-clang-14/conan/profiles/default | 2 -- .../macos-13-clang-14/conan/profiles/pdf2htmlEX-config | 6 ------ .../config/macos-14-armv8-clang-14/conan/profiles/default | 2 -- .../conan/profiles/pdf2htmlEX-config | 6 ------ .github/config/ubuntu-24.04-clang-18/conan/profiles/default | 2 -- .../ubuntu-24.04-clang-18/conan/profiles/pdf2htmlEX-config | 6 ------ .github/config/ubuntu-24.04-gcc-14/conan/profiles/default | 2 -- .../ubuntu-24.04-gcc-14/conan/profiles/pdf2htmlEX-config | 6 ------ .../config/windows-2022-msvc-1940/conan/profiles/default | 2 -- .../windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config | 6 ------ 10 files changed, 40 deletions(-) delete mode 100644 .github/config/macos-13-clang-14/conan/profiles/pdf2htmlEX-config delete mode 100644 .github/config/macos-14-armv8-clang-14/conan/profiles/pdf2htmlEX-config delete mode 100644 .github/config/ubuntu-24.04-clang-18/conan/profiles/pdf2htmlEX-config delete mode 100644 .github/config/ubuntu-24.04-gcc-14/conan/profiles/pdf2htmlEX-config delete mode 100644 .github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config diff --git a/.github/config/macos-13-clang-14/conan/profiles/default b/.github/config/macos-13-clang-14/conan/profiles/default index e6de4e9d..918a142a 100644 --- a/.github/config/macos-13-clang-14/conan/profiles/default +++ b/.github/config/macos-13-clang-14/conan/profiles/default @@ -1,5 +1,3 @@ -include(pdf2htmlEX-config) - [settings] arch=x86_64 build_type=RelWithDebInfo diff --git a/.github/config/macos-13-clang-14/conan/profiles/pdf2htmlEX-config b/.github/config/macos-13-clang-14/conan/profiles/pdf2htmlEX-config deleted file mode 100644 index 10f7c1f9..00000000 --- a/.github/config/macos-13-clang-14/conan/profiles/pdf2htmlEX-config +++ /dev/null @@ -1,6 +0,0 @@ -[options] -# @TODO: Fix linker errors caused by the absense of these options -# Fontforge build failure if GLib built with mount -glib/*:with_mount=False -# Fontforge build failure if FreeType built with Brotli -freetype/*:with_brotli=False diff --git a/.github/config/macos-14-armv8-clang-14/conan/profiles/default b/.github/config/macos-14-armv8-clang-14/conan/profiles/default index 7956113d..59c10a7c 100644 --- a/.github/config/macos-14-armv8-clang-14/conan/profiles/default +++ b/.github/config/macos-14-armv8-clang-14/conan/profiles/default @@ -1,5 +1,3 @@ -include(pdf2htmlEX-config) - [settings] arch=armv8 build_type=RelWithDebInfo diff --git a/.github/config/macos-14-armv8-clang-14/conan/profiles/pdf2htmlEX-config b/.github/config/macos-14-armv8-clang-14/conan/profiles/pdf2htmlEX-config deleted file mode 100644 index 10f7c1f9..00000000 --- a/.github/config/macos-14-armv8-clang-14/conan/profiles/pdf2htmlEX-config +++ /dev/null @@ -1,6 +0,0 @@ -[options] -# @TODO: Fix linker errors caused by the absense of these options -# Fontforge build failure if GLib built with mount -glib/*:with_mount=False -# Fontforge build failure if FreeType built with Brotli -freetype/*:with_brotli=False diff --git a/.github/config/ubuntu-24.04-clang-18/conan/profiles/default b/.github/config/ubuntu-24.04-clang-18/conan/profiles/default index a709f923..594120e8 100644 --- a/.github/config/ubuntu-24.04-clang-18/conan/profiles/default +++ b/.github/config/ubuntu-24.04-clang-18/conan/profiles/default @@ -1,5 +1,3 @@ -include(pdf2htmlEX-config) - [settings] arch=x86_64 build_type=RelWithDebInfo diff --git a/.github/config/ubuntu-24.04-clang-18/conan/profiles/pdf2htmlEX-config b/.github/config/ubuntu-24.04-clang-18/conan/profiles/pdf2htmlEX-config deleted file mode 100644 index 10f7c1f9..00000000 --- a/.github/config/ubuntu-24.04-clang-18/conan/profiles/pdf2htmlEX-config +++ /dev/null @@ -1,6 +0,0 @@ -[options] -# @TODO: Fix linker errors caused by the absense of these options -# Fontforge build failure if GLib built with mount -glib/*:with_mount=False -# Fontforge build failure if FreeType built with Brotli -freetype/*:with_brotli=False diff --git a/.github/config/ubuntu-24.04-gcc-14/conan/profiles/default b/.github/config/ubuntu-24.04-gcc-14/conan/profiles/default index 4992a4cb..c639209e 100644 --- a/.github/config/ubuntu-24.04-gcc-14/conan/profiles/default +++ b/.github/config/ubuntu-24.04-gcc-14/conan/profiles/default @@ -1,5 +1,3 @@ -include(pdf2htmlEX-config) - [settings] arch=x86_64 build_type=RelWithDebInfo diff --git a/.github/config/ubuntu-24.04-gcc-14/conan/profiles/pdf2htmlEX-config b/.github/config/ubuntu-24.04-gcc-14/conan/profiles/pdf2htmlEX-config deleted file mode 100644 index 10f7c1f9..00000000 --- a/.github/config/ubuntu-24.04-gcc-14/conan/profiles/pdf2htmlEX-config +++ /dev/null @@ -1,6 +0,0 @@ -[options] -# @TODO: Fix linker errors caused by the absense of these options -# Fontforge build failure if GLib built with mount -glib/*:with_mount=False -# Fontforge build failure if FreeType built with Brotli -freetype/*:with_brotli=False diff --git a/.github/config/windows-2022-msvc-1940/conan/profiles/default b/.github/config/windows-2022-msvc-1940/conan/profiles/default index 0283073c..cc618806 100644 --- a/.github/config/windows-2022-msvc-1940/conan/profiles/default +++ b/.github/config/windows-2022-msvc-1940/conan/profiles/default @@ -1,5 +1,3 @@ -include(pdf2htmlEX-config) - [settings] arch=x86_64 build_type=Release diff --git a/.github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config b/.github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config deleted file mode 100644 index 10f7c1f9..00000000 --- a/.github/config/windows-2022-msvc-1940/conan/profiles/pdf2htmlEX-config +++ /dev/null @@ -1,6 +0,0 @@ -[options] -# @TODO: Fix linker errors caused by the absense of these options -# Fontforge build failure if GLib built with mount -glib/*:with_mount=False -# Fontforge build failure if FreeType built with Brotli -freetype/*:with_brotli=False From 4e65796bf909e01b65f49fbfeda67195f6b2edfc Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 00:01:14 +0300 Subject: [PATCH 24/97] Build with wvWare --- CMakeLists.txt | 11 + conanfile.py | 6 + src/wvWare.c | 1897 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1914 insertions(+) create mode 100644 src/wvWare.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 2527d3e3..ba686b02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ option(ODR_TEST "enable tests" OFF) option(ODR_CLI "enable command line interface" ON) option(ODR_CLANG_TIDY "Run clang-tidy static analysis" OFF) option(WITH_PDF2HTMLEX "Build with pdf2htmlEX" ON) +option(WITH_WVWARE "Build with wvWare" ON) # TODO defining global compiler flags seems to be bad practice with conan # TODO consider using conan profiles @@ -178,6 +179,11 @@ if(WITH_PDF2HTMLEX) "src/odr/internal/html/pdf2htmlEX_wrapper.cpp" ) endif(WITH_PDF2HTMLEX) +if(WITH_WVWARE) + LIST(APPEND ODR_SOURCE_FILES + "src/wvWare.c" + ) +endif(WITH_WVWARE) add_library(odr ${ODR_SOURCE_FILES}) set_target_properties(odr PROPERTIES OUTPUT_NAME odr) @@ -201,6 +207,11 @@ if(WITH_PDF2HTMLEX) find_package(pdf2htmlEX REQUIRED) target_link_libraries(odr PRIVATE pdf2htmlex::pdf2htmlex) endif(WITH_PDF2HTMLEX) +if(WITH_WVWARE) + target_compile_definitions(odr PRIVATE "WITH_WVWARE=1") + find_package(wvware REQUIRED) + target_link_libraries(odr PRIVATE wvware::wvware) +endif(WITH_WVWARE) if (EXISTS "${PROJECT_SOURCE_DIR}/.git") add_dependencies(odr check_git) diff --git a/conanfile.py b/conanfile.py index 2946772e..ea7f5d3c 100644 --- a/conanfile.py +++ b/conanfile.py @@ -20,11 +20,13 @@ class OpenDocumentCoreConan(ConanFile): "shared": [True, False], "fPIC": [True, False], "with_pdf2htmlEX": [True, False], + "with_wvWare": [True, False], } default_options = { "shared": False, "fPIC": True, "with_pdf2htmlEX": True, + "with_wvWare": True, } def requirements(self): @@ -37,6 +39,8 @@ def requirements(self): self.requires("utfcpp/4.0.4") if self.options.get_safe("with_pdf2htmlEX"): self.requires("pdf2htmlex/0.18.8.rc1-20240814-git") + if self.options.get_safe("with_wvWare"): + self.requires("wvware/1.2.9") def build_requirements(self): self.test_requires("gtest/1.14.0") @@ -53,6 +57,7 @@ def config_options(self): # @TODO: ideally Windows should just default_options['with_pdf2htmlEX'] = False # But by the time config_options() is executed, default_options is already done parsed. del self.options.with_pdf2htmlEX + del self.options.with_wvWare def configure(self): if self.options.shared: @@ -63,6 +68,7 @@ def generate(self): tc.variables["CMAKE_PROJECT_VERSION"] = self.version tc.variables["ODR_TEST"] = False tc.variables["WITH_PDF2HTMLEX"] = self.options.get_safe("with_pdf2htmlEX", False) + tc.variables["WITH_WVWARE"] = self.options.get_safe("with_wvWare", False) tc.generate() deps = CMakeDeps(self) diff --git a/src/wvWare.c b/src/wvWare.c new file mode 100644 index 00000000..a13d13b5 --- /dev/null +++ b/src/wvWare.c @@ -0,0 +1,1897 @@ +/* wvWare + * Copyright (C) Caolan McNamara, Dom Lachowicz, and others + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + * 02111-1307, USA. + */ + +//#ifdef HAVE_CONFIG_H +#include +//#endif + +#include +#include +#include +#include +#include +#include +#include "getopt.h" +#include "errorPrinter.h" + +/* strdup isn't declared in for `gcc -ansi'; declare it here */ +extern char *strdup (const char *); + +extern char *str_copy(char *d, size_t n, char *s); +extern char *str_append(char *d, size_t n, char *s); + +extern char *s_WVDATADIR; +extern char *s_HTMLCONFIG; +extern int documentId; +#define static_reinit( variable, defaultValue ) { \ + static int staticVarValue = 0; \ + if (staticVarValue != documentId) { \ + variable = defaultValue; \ + staticVarValue = documentId; \ + } \ +} + +extern char * strdup_and_append_twice(const char * a, const char * b, const char * c); + +/* +Released under GPL, written by Caolan.McNamara@ul.ie. + +Copyright (C) 1998,1999 + Caolan McNamara + +Real Life: Caolan McNamara * Doing: MSc in HCI +Work: Caolan.McNamara@ul.ie * Phone: +353-86-8790257 +URL: http://skynet.csn.ul.ie/~caolan * Sig: an oblique strategy +How would you have done it? +*/ + +/* +returns 1 for not an ole doc +2 ole but not word doc +-1 for an error of some unknown kind +0 on success +*/ + +char *config = "wvHtml.xml"; + +/* flags for -X / --xml option */ +int xml_output = 0; +extern char *xml_slash; + +/* flag for disabling graphics */ +int no_graphics = 0; + +int myelehandler (wvParseStruct * ps, wvTag tag, void *props, int dirty); +int mydochandler (wvParseStruct * ps, wvTag tag); +int myCharProc (wvParseStruct * ps, U16 eachchar, U8 chartype, U16 lid); +int mySpecCharProc (wvParseStruct * ps, U16 eachchar, CHP * achp); + +int wvOpenConfig (state_data *myhandle,char *config); + +char * wv_arg_basename = 0; +char * figure_name (wvParseStruct * ps); +char * name_to_url (char * name); + +char wv_cwd[4097]; + +int HandleBitmap (wvParseStruct * ps, char *name, BitmapBlip * bitmap); +int HandleMetafile (wvParseStruct * ps, char *name, MetaFileBlip * bitmap); + +/* should really be a config.h decl for having strdup, but... */ +#ifdef __MWERKS__ +char * +strdup (const char *text) +{ + char *buf; + size_t len; + + len = strlen (text) + 1; + buf = (char *) wvMalloc (len); + memcpy (buf, text, len); + + return buf; +} + + +#endif + +char * +wvHtmlGraphic (wvParseStruct * ps, Blip * blip) +{ + char *name; + wvStream * fd; + char test[3]; + + name = figure_name (ps); + if (name == 0) return (0); + + /* + temp hack to test older included bmps in word 6 and 7, + should be wrapped in a modern escher strucure before getting + to here, and then handled as normal + */ + wvTrace (("type is %d\n", blip->type)); + switch (blip->type) + { + case msoblipJPEG: + case msoblipDIB: + case msoblipPNG: + fd = (blip->blip.bitmap.m_pvBits); + test[2] = '\0'; + test[0] = read_8ubit (fd); + + test[1] = read_8ubit (fd); + wvStream_rewind (fd); + if (!(strcmp (test, "BM"))) + { + wvAppendStr (&name, ".bmp"); + if (0 != HandleBitmap (ps, name, &blip->blip.bitmap)) + return (NULL); + return (name); + } + default: + break; + } + + switch (blip->type) + { + case msoblipWMF: + wvAppendStr (&name, ".wmf"); + if (0 != HandleMetafile (ps, name, &blip->blip.metafile)) + return (NULL); + break; + case msoblipEMF: + wvAppendStr (&name, ".emf"); + if (0 != HandleMetafile (ps, name, &blip->blip.metafile)) + return (NULL); + break; + case msoblipPICT: + wvAppendStr (&name, ".pict"); + if (0 != HandleMetafile (ps, name, &blip->blip.metafile)) + return (NULL); + break; + case msoblipJPEG: + wvAppendStr (&name, ".jpg"); + if (0 != HandleBitmap (ps, name, &blip->blip.bitmap)) + return (NULL); + break; + case msoblipDIB: + wvAppendStr (&name, ".dib"); + if (0 != HandleBitmap (ps, name, &blip->blip.bitmap)) + return (NULL); + break; + case msoblipPNG: + wvAppendStr (&name, ".png"); + if (0 != HandleBitmap (ps, name, &blip->blip.bitmap)) + return (NULL); + break; + } + return (name); +} + + +int +HandleBitmap (wvParseStruct * ps, char *name, BitmapBlip * bitmap) +{ + wvStream * pwv = bitmap->m_pvBits; + FILE *fd = NULL; + size_t size = 0, i; + + if (ps->dir) chdir (ps->dir); + fd = fopen (name, "wb"); + if (ps->dir) chdir (wv_cwd); + if (fd == NULL) + { + fprintf (stderr,"\nCannot open %s for writing\n",name); + exit (1); + } + size = wvStream_size (pwv); + wvStream_rewind(pwv); + + for (i = 0; i < size; i++) + fputc (read_8ubit(pwv), fd); + fclose (fd); + wvTrace (("Name is %s\n", name)); + return (0); +} + + +int +HandleMetafile (wvParseStruct * ps, char *name, MetaFileBlip * bitmap) +{ + wvStream * pwv = bitmap->m_pvBits; + FILE *fd = NULL; + size_t size = 0, i; + U8 decompressf = 0; + + if (ps->dir) chdir (ps->dir); + fd = fopen (name, "wb"); + if (ps->dir) chdir (wv_cwd); + if (fd == NULL) + { + fprintf (stderr,"\nCannot open %s for writing\n",name); + exit (1); + } + size = wvStream_size (pwv); + wvStream_rewind(pwv); + + if (bitmap->m_fCompression == msocompressionDeflate) + decompressf = setdecom (); + + if ( !decompressf) + { + for (i = 0; i < size; i++) + fputc (read_8ubit(pwv), fd); + } + else /* decompress here */ + { + FILE *tmp = tmpfile (); + FILE *out = tmpfile (); + + for (i = 0; i < size; i++) + fputc (read_8ubit(pwv), tmp); + + rewind (tmp); + decompress (tmp, out, bitmap->m_cbSave, bitmap->m_cb); + fclose (tmp); + + rewind(out); + + for (i = 0; i < bitmap->m_cb; i++) + fputc ( fgetc(out), fd); + + fclose(out); + + } + + fclose (fd); + wvTrace (("Name is %s\n", name)); + return (0); +} + +static void +do_version (void) +{ + /* todo: initialize this in a configure script */ + printf ("wvWare %s\n", VERSION); +} + +static void +do_help (void) +{ + do_version (); + printf ("Usage: wvWare [OPTION...] filename.doc\n"); + printf ("\nCommon Options:\n"); + printf (" -x --config=config.xml\tSpecify an output filter to use\n"); + printf (" -c --charset=charset\t\tSpecify an iconv charset encoding\n"); + printf (" -p --password=password\tSpecify password for encrypted\n\t\t\t\tWord Documents\n"); + printf (" -d --dir=dir\t\t\tDIR\n"); + printf (" -b --basename=name\t\tUse name as base name of image files\n"); + printf (" -a --auto-eps=fmt\t\tQuery support for conversion of fmt to eps\n"); + printf (" -s --suppress=fmt\t\tDon't convert fmt to eps\n"); + printf (" -X --xml\t\t\tXML output\n"); + printf (" -1 --nographics\t\tno 0x01 graphics output\n"); + printf (" -v --version\t\t\tPrint wvWare's version number\n"); + printf (" -? --help\t\t\tPrint this help message\n"); + printf + ("\nwvWare is a suite of applications that converts Microsoft Word Documents\n"); + printf + ("(versions 2,5,6,7,8,9) into more \"useful\" formats such as HTML, LaTeX,\n"); + printf + ("ABW, WML, Text, etc... wvWare is also a library which can be used by\n"); + printf + ("other applications to import (and soon export) Word documents.\n\n"); + printf ("Authors:\nDom Lachowicz (dominicl@seas.upenn.edu)\n"); + printf ("Caolan McNamara (original author)\nVisit http://www.wvware.com\n"); +} + +static void wv_query_eps (const char* format); +static void wv_suppress (const char* format); + +char *charset = "utf-8"; + +#if 0 +int +main (int argc, char **argv) +{ + FILE *input; + char *password = NULL; + char *dir = NULL; + int ret; + state_data myhandle; + expand_data expandhandle; + wvParseStruct ps; + int c, index = 0; + static struct option long_options[] = { + {"charset", 1, 0, 'c'}, + {"config", 1, 0, 'x'}, + {"password", 1, 0, 'p'}, + {"dir", 1, 0, 'd'}, + {"basename", 1, 0, 'b'}, + {"auto-eps", 1, 0, 'a'}, + {"suppress", 1, 0, 's'}, + {"version", 0, 0, 'v'}, + {"help", 0, 0, '?'}, + {"xml", 0, 0, 'X'}, + {"nographics", 0, 0, '1'}, + {0, 0, 0, 0} + }; + + if (argc < 2) + { + do_help (); + exit (-1); + } + + while (1) + { + c = getopt_long (argc, argv, "?vc:x:p:d:b:a:s:X1", long_options, &index); + if (c == -1) + break; + switch (c) + { + case '?': + do_help (); + return 0; + case 'v': + do_version (); + return 0; + case 'c': + if (optarg) + charset = optarg; + else + wvError (("No argument given to charset")); + break; + case 'x': + if (optarg) + config = optarg; + else + wvError (("No config file given to config option")); + break; + case 'p': + if (optarg) + password = optarg; + else + wvError (("No password given to password option")); + break; + case 'd': + if (optarg) + dir = optarg; + else + wvError (("No directory given to dir option")); + break; + case 'b': + if (optarg) + wv_arg_basename = optarg; + else + wvError (("No name given to basename option")); + break; + case 'a': + wv_query_eps (optarg); + return 0; + case 's': + wv_suppress (optarg); + break; + + case 'X': + config = "wvXml.xml"; + charset = "utf-8"; + xml_output = 1; + xml_slash = " /"; + break; + + case '1': + no_graphics = 1; + break; + + default: + do_help (); + return -1; + } + } + + if (optind >= argc) + { + fprintf (stderr, "No file name given to open\n"); + return (-1); + } + +#if 0 + input = fopen (argv[optind], "rb"); + if (!input) + { + fprintf (stderr, "Failed to open %s\n", argv[optind]); + return (-1); + } + fclose (input); +#endif + + getcwd (wv_cwd,4096); + wv_cwd[4096] = 0; + + wvInit (); + ret = wvInitParser (&ps, argv[optind]); + ps.filename = argv[optind]; + ps.dir = dir; + + if (ret & 0x8000) /* Password protected? */ + { + if ((ret & 0x7fff) == WORD8) + { + ret = 0; + if (password == NULL) + { + fprintf (stderr, + "Password required, this is an encrypted document\n"); + return (-1); + } + else + { + wvSetPassword (password, &ps); + if (wvDecrypt97 (&ps)) + { + wvError (("Incorrect Password\n")); + return (-1); + } + } + } + else if (((ret & 0x7fff) == WORD7) || ((ret & 0x7fff) == WORD6)) + { + ret = 0; + if (password == NULL) + { + fprintf (stderr, + "Password required, this is an encrypted document\n"); + return (-1); + } + else + { + wvSetPassword (password, &ps); + if (wvDecrypt95 (&ps)) + { + wvError (("Incorrect Password\n")); + return (-1); + } + } + } + } + + if (ret) + { + wvError (("startup error #%d\n", ret)); + wvOLEFree (&ps); + return (-1); + } + + wvSetElementHandler (&ps, myelehandler); + wvSetDocumentHandler (&ps, mydochandler); + wvSetCharHandler (&ps, myCharProc); + wvSetSpecialCharHandler (&ps, mySpecCharProc); + + wvInitStateData (&myhandle); + + if (wvOpenConfig (&myhandle,config) == 0) + { + wvError (("config file not found\n")); + return (-1); + } + else + { + wvTrace (("x for FILE is %x\n", myhandle.fp)); + ret = wvParseConfig (&myhandle); + } + + if (!ret) + { + expandhandle.sd = &myhandle; + ps.userData = &expandhandle; + ret = wvHtml (&ps); + } + wvReleaseStateData (&myhandle); + + if (ret == 2) + return (2); + else if (ret != 0) + ret = -1; + wvOLEFree (&ps); + wvShutdown (); + + return (ret); +} +#endif + +int convert(char *inputFile, char *outputDir, const char *password) { + int ret; + state_data myhandle; + expand_data expandhandle; + wvParseStruct ps; + + config = "wvHtml.xml"; + + getcwd (wv_cwd,4096); + wv_cwd[4096] = 0; + + wvInit (); + ret = wvInitParser (&ps, inputFile); + ps.dir = outputDir; + + if (ret & 0x8000) /* Password protected? */ + { + if ((ret & 0x7fff) == WORD8) + { + ret = 0; + if (password == NULL || password[0] == '\0') + { + fprintf (stderr, + "Password required, this is an encrypted document\n"); + return 100; + } + else + { + wvSetPassword (password, &ps); + if (wvDecrypt97 (&ps)) + { + wvError (("Incorrect Password\n")); + return 101; + } + } + } + else if (((ret & 0x7fff) == WORD7) || ((ret & 0x7fff) == WORD6)) + { + ret = 0; + if (password == NULL || password[0] == '\0') + { + fprintf (stderr, + "Password required, this is an encrypted document\n"); + return 100; + } + else + { + wvSetPassword (password, &ps); + if (wvDecrypt95 (&ps)) + { + wvError (("Incorrect Password\n")); + return 101; + } + } + } + } + + if (ret) + { + wvError (("startup error #%d\n", ret)); + wvOLEFree (&ps); + return (-1); + } + + wvSetElementHandler (&ps, myelehandler); + wvSetDocumentHandler (&ps, mydochandler); + wvSetCharHandler (&ps, myCharProc); + wvSetSpecialCharHandler (&ps, mySpecCharProc); + + wvInitStateData (&myhandle); + + if (wvOpenConfig (&myhandle,config) == 0) + { + wvError (("config file not found\n")); + return (-1); + } + else + { + wvTrace (("x for FILE is %x\n", myhandle.fp)); + ret = wvParseConfig (&myhandle); + } + + if (!ret) + { + expandhandle.sd = &myhandle; + ps.userData = &expandhandle; + ret = wvHtml (&ps); + } + wvReleaseStateData (&myhandle); + + if (ret == 2) + return (2); + else if (ret != 0) + ret = -1; + wvOLEFree (&ps); + wvShutdown (); + + return (ret); +} + +int +myelehandler (wvParseStruct * ps, wvTag tag, void *props, int dirty) +{ + static PAP *ppap; + static_reinit(ppap, NULL) + + expand_data *data = (expand_data *) ps->userData; + data->anSttbfAssoc = &ps->anSttbfAssoc; + data->lfo = &ps->lfo; + data->lfolvl = ps->lfolvl; + data->lvl = ps->lvl; + data->nolfo = &ps->nolfo; + data->nooflvl = &ps->nooflvl; + data->stsh = &ps->stsh; + data->lst = &ps->lst; + data->noofLST = &ps->noofLST; + data->liststartnos = &ps->liststartnos; + data->listnfcs = &ps->listnfcs; + data->finallvl = &ps->finallvl; + data->fib = &ps->fib; + data->dop = &ps->dop; + data->intable = &ps->intable; + data->cellbounds = &ps->cellbounds; + data->nocellbounds = &ps->nocellbounds; + data->endcell = &ps->endcell; + data->vmerges = &ps->vmerges; + data->norows = &ps->norows; + data->nextpap = &ps->nextpap; + if (charset == NULL) + { + data->charset = wvAutoCharset (ps); + charset = data->charset; + } + else + data->charset = charset; + data->props = props; + + switch (tag) + { + case PARABEGIN: + { + S16 tilfo = 0; + /* test begin */ + if (*(data->endcell)) + { + tilfo = ((PAP *) (data->props))->ilfo; + ((PAP *) (data->props))->ilfo = 0; + } + /* test end */ + ppap = (PAP *) data->props; + wvTrace ( + ("fore back is %d %d\n", + ((PAP *) (data->props))->shd.icoFore, + ((PAP *) (data->props))->shd.icoBack)); + wvBeginPara (data); + if (tilfo) + ((PAP *) (data->props))->ilfo = tilfo; + } + break; + case PARAEND: + { + S16 tilfo = 0; + /* test begin */ + if (*(data->endcell)) + { + tilfo = ((PAP *) (data->props))->ilfo; + ((PAP *) (data->props))->ilfo = 0; + } + /* test end */ + wvEndCharProp (data); /* danger will break in the future */ + wvEndPara (data); + if (tilfo) + ((PAP *) (data->props))->ilfo = tilfo; + wvCopyPAP (&data->lastpap, (PAP *) (data->props)); + } + break; + case CHARPROPBEGIN: + wvBeginCharProp (data, ppap); + break; + case CHARPROPEND: + wvEndCharProp (data); + break; + case SECTIONBEGIN: + wvBeginSection (data); + break; + case SECTIONEND: + wvEndSection (data); + break; + case COMMENTBEGIN: + wvBeginComment (data); + break; + case COMMENTEND: + wvEndComment (data); + break; + default: + break; + } + return (0); +} + +int +mydochandler (wvParseStruct * ps, wvTag tag) +{ + static int i; + static_reinit(i, 0) + expand_data *data = (expand_data *) ps->userData; + data->anSttbfAssoc = &ps->anSttbfAssoc; + data->lfo = &ps->lfo; + data->lfolvl = ps->lfolvl; + data->lvl = ps->lvl; + data->nolfo = &ps->nolfo; + data->nooflvl = &ps->nooflvl; + data->stsh = &ps->stsh; + data->lst = &ps->lst; + data->noofLST = &ps->noofLST; + data->liststartnos = &ps->liststartnos; + data->listnfcs = &ps->listnfcs; + data->finallvl = &ps->finallvl; + data->fib = &ps->fib; + data->dop = &ps->dop; + data->intable = &ps->intable; + data->cellbounds = &ps->cellbounds; + data->nocellbounds = &ps->nocellbounds; + data->endcell = &ps->endcell; + data->vmerges = &ps->vmerges; + data->norows = &ps->norows; + if (i == 0) + { + wvSetEntityConverter (data); + data->filename = ps->filename; + data->whichcell = 0; + data->whichrow = 0; + data->asep = NULL; + i++; + wvInitPAP (&data->lastpap); + data->nextpap = NULL; + data->ps = ps; + } + + if (charset == NULL) + { + data->charset = wvAutoCharset (ps); + charset = data->charset; + } + else + data->charset = charset; + + switch (tag) + { + case DOCBEGIN: + wvBeginDocument (data); + break; + case DOCEND: + wvEndDocument (data); + break; + default: + break; + } + return (0); +} + +void +wvStrangeNoGraphicData (char *config, int graphicstype) +{ + wvError (("Strange No Graphic Data in the 0x01/0x08 graphic\n")); + + if ((strstr (config, "wvLaTeX.xml") != NULL) + || (strstr (config, "wvCleanLaTeX.xml") != NULL)) + printf + ("\n\\resizebox*{\\baselineskip}{!}{\\includegraphics{placeholder.eps}}\ + \n-- %#.2x graphic: StrangeNoGraphicData --", + graphicstype); + else + printf ("\"%#.2x", graphicstype, + "StrangeNoGraphicData", xml_slash, xml_slash); + return; +} + +/* routines for conversion from WMF to EPS or PNG using libwmf(2) library. + */ +int wv_wmfRead (void *); +int wv_wmfSeek (void *, long); +long wv_wmfTell (void *); + +void wvConvert_WMF_to_EPS (int, int, char **); +void wvConvert_WMF_to_PNG (int, int, char **); +void wvConvert_PNG_to_EPS (int, int, char **); +void wvConvert_JPG_to_EPS (int, int, char **); + +int +wv_wmfRead (void *context) +{ + return (fgetc ((FILE *) context)); +} + +int +wv_wmfSeek (void *context, long position) +{ + return (fseek ((FILE *) context, position, SEEK_SET)); +} + +long +wv_wmfTell (void *context) +{ + return (ftell ((FILE *) context)); +} + +#ifdef HAVE_LIBWMF + +#include +#include +#ifdef HAVE_LIBWMF_FOREIGN_H +#include +#endif + +#endif /* HAVE_LIBWMF */ + +void +wvConvert_WMF_to_EPS (int width, int height, char **source) +{ +#ifdef HAVE_LIBWMF + FILE *in = 0; + FILE *out = 0; + + char *sink = 0; + + unsigned long flags; + + wmf_error_t err; + + wmf_eps_t *ddata = 0; + + wmfAPI *API = 0; + + wmfAPI_Options api_options; + + wmfD_Rect bbox; + + in = fopen (*source, "rb"); + + if (in == 0) + return; + + sink = strdup (*source); + + remove_suffix (sink, ".wmf"); + wvAppendStr (&sink, ".eps"); + + out = fopen (sink, "wb"); + + if (out == 0) + { + wvFree (sink); + fclose (in); + return; + } + + flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; + api_options.function = wmf_eps_function; + + err = wmf_api_create (&API, flags, &api_options); + if (err != wmf_E_None) + goto _wmf_error; + + ddata = WMF_EPS_GetData (API); + + err = wmf_bbuf_input (API, wv_wmfRead, wv_wmfSeek, wv_wmfTell, (void *) in); + if (err != wmf_E_None) + goto _wmf_error; + + err = wmf_scan (API, 0, &bbox); + if (err != wmf_E_None) + goto _wmf_error; + + ddata->out = wmf_stream_create (API,out); + if (out == 0) + goto _wmf_error; + + ddata->bbox = bbox; + + ddata->eps_width = width; + ddata->eps_height = height; + + err = wmf_play (API, 0, &bbox); + if (err != wmf_E_None) + goto _wmf_error; + + wmf_api_destroy (API); + + fclose (in); + fclose (out); + + *source = sink; + + return; + + _wmf_error: + if (API) + wmf_api_destroy (API); + + fclose (in); + fclose (out); + + wvFree (sink); +#endif /* HAVE_LIBWMF */ +} + +#ifdef HAVE_LIBWMF + +#include +#include + +#endif /* HAVE_LIBWMF */ + +void +wvConvert_WMF_to_PNG (int width, int height, char **source) +{ +#ifdef HAVE_LIBWMF + FILE *in = 0; + FILE *out = 0; + + char *sink = 0; + + unsigned long flags; + + wmf_error_t err; + + wmf_gd_t *ddata = 0; + + wmfAPI *API = 0; + + wmfAPI_Options api_options; + + wmfD_Rect bbox; + + in = fopen (*source, "rb"); + + if (in == 0) + return; + + sink = strdup (*source); + + remove_suffix (sink, ".wmf"); + wvAppendStr (&sink, ".png"); + + out = fopen (sink, "wb"); + + if (out == 0) + { + wvFree (sink); + fclose (in); + return; + } + + flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; + api_options.function = wmf_gd_function; + + err = wmf_api_create (&API, flags, &api_options); + if (err != wmf_E_None) + goto _wmf_error; + + ddata = WMF_GD_GetData (API); + if ((ddata->flags & WMF_GD_SUPPORTS_PNG) == 0) + goto _wmf_error; + + err = wmf_bbuf_input (API, wv_wmfRead, wv_wmfSeek, wv_wmfTell, (void *) in); + if (err != wmf_E_None) + goto _wmf_error; + + err = wmf_scan (API, 0, &bbox); + if (err != wmf_E_None) + goto _wmf_error; + + ddata->type = wmf_gd_png; + + ddata->flags |= WMF_GD_OUTPUT_FILE; + ddata->file = out; + + ddata->bbox = bbox; + + ddata->width = width; + ddata->height = height; + + err = wmf_play (API, 0, &bbox); + if (err != wmf_E_None) + goto _wmf_error; + + wmf_api_destroy (API); + + fclose (in); + fclose (out); + + *source = sink; + + return; + + _wmf_error: + if (API) + wmf_api_destroy (API); + + fclose (in); + fclose (out); + + wvFree (sink); +#endif /* HAVE_LIBWMF */ +} + +void +wvConvert_PNG_to_EPS (int width, int height, char **source) +{ +#ifdef HAVE_LIBWMF_FOREIGN_H + FILE *in = 0; + FILE *out = 0; + + char *sink = 0; + + unsigned long flags; + + wmf_error_t err; + + wmf_foreign_t *ddata = 0; + + wmfAPI *API = 0; + + wmfAPI_Options api_options; + + wmfImage image; + + flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; + api_options.function = wmf_foreign_function; + + err = wmf_api_create (&API, flags, &api_options); + if (err != wmf_E_None) + return; + + ddata = WMF_FOREIGN_GetData (API); + + if ((ddata->flags & WMF_FOREIGN_SUPPORTS_PNG) == 0) + { + wmf_api_destroy (API); + return; + } + + in = fopen (*source, "rb"); + + if (in == 0) + { + wmf_api_destroy (API); + return; + } + + if (wmf_image_load_png (API,in,&image) == (-1)) + { + fclose (in); + wmf_api_destroy (API); + return; + } + + fclose (in); + + sink = strdup (*source); + + remove_suffix (sink, ".png"); + wvAppendStr (&sink, ".eps"); + + out = fopen (sink, "wb"); + + if (out == 0) + { + wvFree (sink); + wmf_image_free (API,&image); + wmf_api_destroy (API); + return; + } + + wmf_image_save_eps (API,out,&image); + + fclose (out); + + wmf_image_free (API,&image); + wmf_api_destroy (API); + + *source = sink; + + return; +#endif /* HAVE_LIBWMF_FOREIGN_H */ +} + +void +wvConvert_JPG_to_EPS (int width, int height, char **source) +{ +#ifdef HAVE_LIBWMF_FOREIGN_H + FILE *in = 0; + FILE *out = 0; + + char *sink = 0; + + unsigned long flags; + + wmf_error_t err; + + wmf_foreign_t *ddata = 0; + + wmfAPI *API = 0; + + wmfAPI_Options api_options; + + wmfImage image; + + flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; + api_options.function = wmf_foreign_function; + + err = wmf_api_create (&API, flags, &api_options); + if (err != wmf_E_None) + return; + + ddata = WMF_FOREIGN_GetData (API); + + if ((ddata->flags & WMF_FOREIGN_SUPPORTS_JPEG) == 0) + { + wmf_api_destroy (API); + return; + } + + in = fopen (*source, "rb"); + + if (in == 0) + { + wmf_api_destroy (API); + return; + } + + if (wmf_image_load_jpg (API,in,&image) == (-1)) + { + fclose (in); + wmf_api_destroy (API); + return; + } + + fclose (in); + + sink = strdup (*source); + + remove_suffix (sink, ".jpg"); + wvAppendStr (&sink, ".eps"); + + out = fopen (sink, "wb"); + + if (out == 0) + { + wvFree (sink); + wmf_image_free (API,&image); + wmf_api_destroy (API); + return; + } + + wmf_image_save_eps (API,out,&image); + + fclose (out); + + wmf_image_free (API,&image); + wmf_api_destroy (API); + + *source = sink; + + return; +#endif /* HAVE_LIBWMF_FOREIGN_H */ +} + +static void wv_query_eps (const char* format) +{ +#ifdef HAVE_LIBWMF + unsigned long flags; + + wmf_error_t err; +#ifdef HAVE_LIBWMF_FOREIGN_H + wmf_foreign_t *ddata = 0; +#endif /* HAVE_LIBWMF_FOREIGN_H */ + wmfAPI* API = 0; + wmfAPI_Options api_options; +#endif /* HAVE_LIBWMF */ + + if (format == 0) + { + printf ("no\n"); + return; + } + +#ifdef HAVE_LIBWMF + if (strcmp (format,"wmf") == 0) + { + printf ("yes\n"); + return; + } +#ifdef HAVE_LIBWMF_FOREIGN_H + if (strcmp (format,"png") == 0) + { + flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; + api_options.function = wmf_foreign_function; + + err = wmf_api_create (&API, flags, &api_options); + if (err != wmf_E_None) + { + printf ("no\n"); + return; + } + + ddata = WMF_FOREIGN_GetData (API); + + if (ddata->flags & WMF_FOREIGN_SUPPORTS_PNG) + { + printf ("yes\n"); + } + else + { + printf ("no\n"); + } + + wmf_api_destroy (API); + return; + } + if (strcmp (format,"jpg") == 0) + { + flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; + api_options.function = wmf_foreign_function; + + err = wmf_api_create (&API, flags, &api_options); + if (err != wmf_E_None) + { + printf ("no\n"); + return; + } + + ddata = WMF_FOREIGN_GetData (API); + + if (ddata->flags & WMF_FOREIGN_SUPPORTS_JPEG) + { + printf ("yes\n"); + } + else + { + printf ("no\n"); + } + + wmf_api_destroy (API); + return; + } +#endif /* HAVE_LIBWMF_FOREIGN_H */ +#endif /* HAVE_LIBWMF */ + + printf ("no\n"); + return; +} + +static int Convert_WMF = 1; +static int Convert_EMF = 1; +static int Convert_PNG = 1; +static int Convert_JPG = 1; +static int Convert_PICT = 1; + +static void wv_suppress (const char* format) +{ + const char* ptr = format; + + if (format == 0) + { + Convert_WMF = 1; + Convert_EMF = 1; + Convert_PNG = 1; + Convert_JPG = 1; + Convert_PICT = 1; + + return; + } + + while (*ptr) + { + if (strncmp (ptr,"wmf,",4) == 0) + { + Convert_WMF = 0; + ptr += 4; + continue; + } + if (strncmp (ptr,"emf,",4) == 0) + { + Convert_EMF = 0; + ptr += 4; + continue; + } + if (strncmp (ptr,"png,",4) == 0) + { + Convert_PNG = 0; + ptr += 4; + continue; + } + if (strncmp (ptr,"jpg,",4) == 0) + { + Convert_JPG = 0; + ptr += 4; + continue; + } + if (strncmp (ptr,"pict,",5) == 0) + { + Convert_PICT = 0; + ptr += 5; + continue; + } + + if (strcmp (ptr,"wmf") == 0) + { + Convert_WMF = 0; + break; + } + if (strcmp (ptr,"emf") == 0) + { + Convert_EMF = 0; + break; + } + if (strcmp (ptr,"png") == 0) + { + Convert_PNG = 0; + break; + } + if (strcmp (ptr,"jpg") == 0) + { + Convert_JPG = 0; + break; + } + if (strcmp (ptr,"pict") == 0) + { + Convert_PICT = 0; + break; + } + + fprintf (stderr,"format(s) `%s' not recognized!\n",ptr); + break; + } +} + +void +wvPrintGraphics (char *config, int graphicstype, int width, int height, + char *source) +{ + if ((strstr (config, "wvLaTeX.xml") != NULL) + || (strstr (config, "wvCleanLaTeX.xml") != NULL)) + { + if (strlen (source) >= 4) + { + if (Convert_WMF && strcmp (source + strlen (source) - 4, ".wmf") == 0) + wvConvert_WMF_to_EPS (width, height, &source); + else if (Convert_PNG && strcmp (source + strlen (source) - 4, ".png") == 0) + wvConvert_PNG_to_EPS (width, height, &source); + else if (Convert_JPG && strcmp (source + strlen (source) - 4, ".jpg") == 0) + wvConvert_JPG_to_EPS (width, height, &source); + } + remove_suffix (source, ".eps"); + remove_suffix (source, ".wmf"); + remove_suffix (source, ".pict"); + remove_suffix (source, ".png"); + remove_suffix (source, ".jpg"); + /* + Output to real file name. Conversion to .eps must be done manually for now + */ + printf ("\n\\resizebox{%dpt}{%dpt}\ + {\\includegraphics{%s.eps}}\ + \n% -- %#.2x graphic -- \n", width, height, source, graphicstype); + } + else + { + if (strlen (source) >= 4) + if (strcmp (source + strlen (source) - 4, ".wmf") == 0) + wvConvert_WMF_to_PNG (width, height, &source); + if ((strstr (config, "wvHtml.xml") != NULL) + || (strstr (config, "wvWml.xml") != NULL)) + { + printf ("\"%#.2x", + width, height, graphicstype, name_to_url (source), + xml_slash, xml_slash); + } + else + { + printf ("\"%#.2x", + width, height, graphicstype, source, + xml_slash, xml_slash); + } + } + return; +} + +int +mySpecCharProc (wvParseStruct * ps, U16 eachchar, CHP * achp) +{ + static int message; + static_reinit(message, 0) + PICF picf; + FSPA *fspa; + expand_data *data = (expand_data *) ps->userData; + + switch (eachchar) + { + case 19: + wvError (("field began\n")); + ps->fieldstate++; + ps->fieldmiddle = 0; + fieldCharProc (ps, eachchar, 0, 0x400); /* temp */ + return (0); + break; + case 20: + wvTrace (("field middle\n")); + if (achp->fOle2) + { + wvError ( + ("this field has an associated embedded object of id %x\n", + achp->fcPic_fcObj_lTagObj)); + /*test = wvFindObject(achp->fcPic_fcObj_lTagObj); + if (test) + wvError(("data can be found in object entry named %s\n",test->name)); + */ } + fieldCharProc (ps, eachchar, 0, 0x400); /* temp */ + ps->fieldmiddle = 1; + return (0); + break; + case 21: + wvTrace (("field end\n")); + ps->fieldstate--; + ps->fieldmiddle = 0; + fieldCharProc (ps, eachchar, 0, 0x400); /* temp */ + return (0); + break; + } + + if (ps->fieldstate) + { + if (fieldCharProc (ps, eachchar, 0, 0x400)) + return (0); + } + + switch (eachchar) + { + case 0x05: + /* this should be handled by the COMMENTBEGIN and COMMENTEND events */ + return (0); + break; + case 0x01: + { + wvStream *f; + Blip blip; + char *name; + long p = wvStream_tell (ps->data); + wvError ( + ("picture 0x01 here, at offset %x in Data Stream, obj is %d, ole is %d\n", + achp->fcPic_fcObj_lTagObj, achp->fObj, achp->fOle2)); + + if (achp->fOle2) + return (0); + if(!no_graphics) + { + wvStream_goto (ps->data, achp->fcPic_fcObj_lTagObj); + wvGetPICF (wvQuerySupported (&ps->fib, NULL), &picf, ps->data); + f = picf.rgb; + if (wv0x01 (&blip, f, picf.lcb - picf.cbHeader)) + { + wvTrace (("Here\n")); + name = wvHtmlGraphic (ps, &blip); + if (ps->dir) chdir (ps->dir); + wvPrintGraphics (config, 0x01, + (int) wvTwipsToHPixels (picf.dxaGoal), + (int) wvTwipsToVPixels (picf.dyaGoal), + name); + if (ps->dir) chdir (wv_cwd); + wvFree (name); + } + else + wvStrangeNoGraphicData (config, 0x01); + } + + wvStream_goto (ps->data, p); + return (0); + } + case 0x08: + { + Blip blip; + char *name; + if (wvQuerySupported (&ps->fib, NULL) == WORD8) + { + if(!no_graphics) + { + if (ps->nooffspa > 0) + { + fspa = + wvGetFSPAFromCP (ps->currentcp, ps->fspa, + ps->fspapos, ps->nooffspa); + + if (!fspa) + { + wvError (("No fspa! Insanity abounds!\n")); + return 0; + } + + data->props = fspa; + if (wv0x08 (&blip, fspa->spid, ps)) + { + wvTrace (("Here\n")); + name = wvHtmlGraphic (ps, &blip); + if (ps->dir) chdir (ps->dir); + wvPrintGraphics (config, 0x08, + (int) + wvTwipsToHPixels (fspa->xaRight + - + fspa-> + xaLeft), + (int) wvTwipsToVPixels (fspa-> + yaBottom + - + fspa-> + yaTop), + name); + if (ps->dir) chdir (wv_cwd); + wvFree (name); + } + else + wvStrangeNoGraphicData (config, 0x08); + } + else + { + wvError (("nooffspa was <=0! Ignoring.\n")); + } + } + } + else + { + FDOA *fdoa; + wvError ( + ("pre word8 0x08 graphic, unsupported at the moment\n")); + fdoa = + wvGetFDOAFromCP (ps->currentcp, ps->fdoa, ps->fdoapos, + ps->nooffdoa); + data->props = fdoa; + } + + + + + +#if 0 + if ((fspa) && (data->sd != NULL) + && (data->sd->elements[TT_PICTURE].str) + && (data->sd->elements[TT_PICTURE].str[0] != NULL)) + { + wvExpand (data, data->sd->elements[TT_PICTURE].str[0], + strlen (data->sd->elements[TT_PICTURE].str[0])); + if (data->retstring) + { + wvTrace ( + ("picture string is now %s", + data->retstring)); + printf ("%s", data->retstring); + wvFree (data->retstring); + } + } +#endif + return (0); + } + case 0x28: + { + U16 symbol[6] = { 'S', 'y', 'm', 'b', 'o', 'l' }; + U16 wingdings[9] = + { 'W', 'i', 'n', 'g', 'd', 'i', 'n', 'g', 's' }; + U16 mtextra[8] = + { 'M', 'T', ' ', 'E', 'x', 't', 'r', 'a' }; + + wvTrace ( + ("no of strings %d %d\n", ps->fonts.nostrings, + achp->ftcSym)); + if (0 == memcmp (symbol, ps->fonts.ffn[achp->ftcSym].xszFfn, 12)) + { + if ((!message) && (strcasecmp ("UTF-8", charset))) + { + wvWarning + ("Symbol font detected (too late sorry!), rerun wvHtml with option --charset utf-8\n\ +option to support correct symbol font conversion to a viewable format.\n"); + message++; + } + wvTrace ( + ("symbol char %d %x %c, using font %d %s\n", + achp->xchSym, achp->xchSym, achp->xchSym, + achp->ftcSym, + wvWideStrToMB (ps->fonts.ffn[achp->ftcSym]. + xszFfn))); + wvTrace ( + ("symbol char ends up as a unicode %x\n", + wvConvertSymbolToUnicode (achp->xchSym - 61440))); + wvOutputFromUnicode (wvConvertSymbolToUnicode + (achp->xchSym - 61440), charset); + return (0); + } + else if (0 == + memcmp (mtextra, ps->fonts.ffn[achp->ftcSym].xszFfn, + 16)) + { + if ((!message) && (strcasecmp ("UTF-8", charset))) + { + wvWarning + ("MT Extra font detected (too late sorry!), rerun wvHtml with option --charset utf-8\n\ +option to support correct symbol font conversion to a viewable format.\n"); + message++; + } + wvTrace ( + ("Symbol char %d %x %c, using font %d %s\n", + achp->xchSym, achp->xchSym, achp->xchSym, + achp->ftcSym, + wvWideStrToMB (ps->fonts.ffn[achp->ftcSym]. + xszFfn))); + wvTrace ( + ("symbol char ends up as a unicode %x\n", + wvConvertMTExtraToUnicode (achp->xchSym - 61440))); + wvOutputFromUnicode (wvConvertMTExtraToUnicode + (achp->xchSym - 61440), charset); + return (0); + } + else if (0 == + memcmp (wingdings, ps->fonts.ffn[achp->ftcSym].xszFfn, + 18)) + { + if (!message) + { + wvError ( + ("I have yet to do a wingdings to unicode mapping table, if you know of one tell me\n")); + message++; + } + } + else + { + if (!message) + { + char *fontname = + wvWideStrToMB (ps->fonts.ffn[achp->ftcSym]. + xszFfn); + wvError ( + ("Special font %s, i need a mapping table to unicode for this\n", + fontname)); + wvFree (fontname); + printf ("*"); + } + return (0); + } + } + default: + return (0); + } + + + + return (0); +} + + +int +myCharProc (wvParseStruct * ps, U16 eachchar, U8 chartype, U16 lid) +{ + switch (eachchar) + { + case 19: + wvTrace (("field began\n")); + ps->fieldstate++; + ps->fieldmiddle = 0; + fieldCharProc (ps, eachchar, chartype, lid); /* temp */ + return (0); + break; + case 20: + wvTrace (("field middle\n")); + fieldCharProc (ps, eachchar, chartype, lid); + ps->fieldmiddle = 1; + return (0); + break; + case 21: + wvTrace (("field began\n")); + ps->fieldmiddle = 0; + ps->fieldstate--; + fieldCharProc (ps, eachchar, chartype, lid); /* temp */ + return (0); + break; + case 0x08: + wvError ( + ("hmm did we loose the fSpec flag ?, this is possibly a bug\n")); + break; + } + + if (ps->fieldstate) + { + if (fieldCharProc (ps, eachchar, chartype, lid)) + return (0); + } + + wvTrace ( + ("charset is %s, lid is %x, type is %d, char is %x\n", charset, + lid, chartype, eachchar)); + + if ((chartype) && (wvQuerySupported (&ps->fib, NULL) == WORD8)) + wvTrace (("lid is %x\n", lid)); + + if (charset != NULL) + wvOutputHtmlChar (eachchar, chartype, charset, lid); + else + wvOutputHtmlChar (eachchar, chartype, wvAutoCharset (ps), lid); + return (0); +} + +int +wvOpenConfig (state_data *myhandle,char *config) +{ + FILE *tmp; + int i = 0; + if (config == NULL) + config = "wvHtml.xml"; + else + i = 1; + tmp = fopen (config, "rb"); + + if(tmp == NULL) + { + static char * buf = NULL; + if (NULL != buf) { + free(buf); + } + buf = strdup_and_append_twice(s_WVDATADIR, "/", config); + config = buf; + tmp = fopen(config, "rb"); + } + + if (tmp == NULL) + { + if (i) + wvError ( + ("Attempt to open %s failed, using %s\n", config, + s_HTMLCONFIG)); + config = s_HTMLCONFIG; + tmp = fopen (config, "rb"); + } + myhandle->path = config; + myhandle->fp = tmp; + return (tmp == NULL ? 0 : 1); +} + +char * figure_name (wvParseStruct * ps) +{ + static int number; + static_reinit(number, 0) + static char * b_name = 0; + static_reinit(b_name, NULL) + char * f_name = 0; + char buffer[10]; + + if (b_name == 0) + { + if (wv_arg_basename) + { + b_name = strdup (wv_arg_basename); +#ifdef WV_REMOVE_SUFFIX + if (b_name) /* remove any suffix */ + { + char * dot = 0; + char * ptr = b_name; + while (*ptr) + { + if (*ptr == '.') dot = ptr; + ptr++; + } + if (dot) *dot = 0; + } +#endif /* WV_REMOVE_SUFFIX */ + } + else + { + b_name = strdup (base_name (ps->filename)); + if (b_name) /* remove '.doc' suffix; case insensitive */ + { + if (strlen (b_name) >= 4) + { + char * dot = b_name + strlen (b_name) - 4; + if (strcasecmp (dot,".doc") == 0) *dot = 0; + } + } + } + } + + if (b_name == 0) + { + fprintf (stderr,"error: unable to create basename!"); + exit (1); + } + + f_name = strdup (b_name); + if (f_name) + { + sprintf (buffer, "%d", number++); + wvAppendStr (&f_name, buffer); + } + else + { + fprintf (stderr,"error: unable to create filename!"); + exit (1); + } + + return (f_name); +} + +char * name_to_url (char * name) +{ + static char * url = 0; + static_reinit(url, NULL) + static long max = 0; + static_reinit(max, 0) + char * ptr = 0; + long count = 0; + + ptr = name; + while (*ptr) + { + switch (*ptr) + { + case ' ': + count += 3; + break; + default: + count++; + break; + } + ptr++; + } + count++; + + if (count > max) + { + char * more = 0; + if (url == 0) + { + more = malloc (count); + } + else + { + more = realloc (url,count); + } + if (more) + { + url = more; + max = count; + } + } + + if (url) + { + count = 0; + ptr = name; + while (*ptr && (count < max)) + { + switch (*ptr) + { + case ' ': + url[count++] = '%'; + if (count < max) url[count++] = '2'; + if (count < max) url[count++] = '0'; + break; + default: + url[count++] = *ptr; + break; + } + ptr++; + } + url[max-1] = 0; + } + else + { + wvError (("failed to convert name to URL\n")); + return (name); + } + + return (url); +} From 05af0daac33dd43360ee74876d3d1b7acbdb769c Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 00:14:00 +0300 Subject: [PATCH 25/97] inline errorPrinter --- src/wvWare.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/wvWare.c b/src/wvWare.c index a13d13b5..46128e3c 100644 --- a/src/wvWare.c +++ b/src/wvWare.c @@ -28,7 +28,27 @@ #include #include #include "getopt.h" -#include "errorPrinter.h" + +#ifdef __ANDROID_API__ +#include +#define ParenthesesStripper(...) __VA_ARGS__ + +#ifdef wvError +#undef wvError +#endif +#define wvError( args ) __android_log_print(ANDROID_LOG_ERROR, "wv", ParenthesesStripper args); + +#ifdef wvWarning +#undef wvWarning +#endif +#define wvWarning( args ) __android_log_print(ANDROID_LOG_WARN, "wv", args); + +#ifdef wvTrace +#undef wvTrace +#endif +//#define wvTrace( args ) __android_log_print(ANDROID_LOG_VERBOSE, "wv", ParenthesesStripper args); +#define wvTrace( args ) +#endif /* strdup isn't declared in for `gcc -ansi'; declare it here */ extern char *strdup (const char *); From bac6e121563617f6338150647d244c7d2c782492 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 01:19:00 +0300 Subject: [PATCH 26/97] cmakedefine WITH_PDF2HTMLEX and WITH_WVWARE in project_info.hpp --- CMakeLists.txt | 4 ++-- src/odr/internal/{project_info.hpp => project_info.hpp.in} | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) rename src/odr/internal/{project_info.hpp => project_info.hpp.in} (79%) diff --git a/CMakeLists.txt b/CMakeLists.txt index ba686b02..317f0950 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -40,6 +40,7 @@ find_package(uchardet REQUIRED) find_package(utf8cpp REQUIRED) configure_file("src/odr/internal/project_info.cpp.in" "src/odr/internal/project_info.cpp") +configure_file("src/odr/internal/project_info.hpp.in" "src/odr/internal/project_info.hpp") set(PRE_CONFIGURE_FILE "src/odr/internal/git_info.cpp.in") set(POST_CONFIGURE_FILE "${CMAKE_CURRENT_BINARY_DIR}/src/odr/internal/git_info.cpp") @@ -186,6 +187,7 @@ if(WITH_WVWARE) endif(WITH_WVWARE) add_library(odr ${ODR_SOURCE_FILES}) +target_include_directories(odr PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src) set_target_properties(odr PROPERTIES OUTPUT_NAME odr) target_include_directories(odr PUBLIC @@ -203,12 +205,10 @@ target_link_libraries(odr ) if(WITH_PDF2HTMLEX) - target_compile_definitions(odr PRIVATE "WITH_PDF2HTMLEX=1") find_package(pdf2htmlEX REQUIRED) target_link_libraries(odr PRIVATE pdf2htmlex::pdf2htmlex) endif(WITH_PDF2HTMLEX) if(WITH_WVWARE) - target_compile_definitions(odr PRIVATE "WITH_WVWARE=1") find_package(wvware REQUIRED) target_link_libraries(odr PRIVATE wvware::wvware) endif(WITH_WVWARE) diff --git a/src/odr/internal/project_info.hpp b/src/odr/internal/project_info.hpp.in similarity index 79% rename from src/odr/internal/project_info.hpp rename to src/odr/internal/project_info.hpp.in index 2c7cfd66..d93c9d88 100644 --- a/src/odr/internal/project_info.hpp +++ b/src/odr/internal/project_info.hpp.in @@ -5,4 +5,7 @@ namespace odr::internal::project_info { const char *version() noexcept; } // namespace odr::internal::project_info +#cmakedefine WITH_PDF2HTMLEX 1 +#cmakedefine WITH_WVWARE 1 + #endif // ODR_INTERNAL_PROJECT_INFO_HPP From 9f3ee87b0528144b63b3bbb0bbfa14ddeb29eecf Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 01:21:11 +0300 Subject: [PATCH 27/97] Introduce Html OpenDocumentReader::pdf2htmlEX() instead of using html::translate --- src/odr/html.cpp | 8 -------- src/odr/open_document_reader.cpp | 12 +++++++++++- src/odr/open_document_reader.hpp | 13 +++++++++++++ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/odr/html.cpp b/src/odr/html.cpp index e9be87fe..9a7fd62b 100644 --- a/src/odr/html.cpp +++ b/src/odr/html.cpp @@ -9,11 +9,7 @@ #include #include #include -#if defined(WITH_PDF2HTMLEX) -#include -#else #include -#endif #include #include @@ -117,11 +113,7 @@ Html html::translate(const Document &document, const std::string &output_path, Html html::translate(const PdfFile &pdf_file, const std::string &output_path, const HtmlConfig &config) { fs::create_directories(output_path); -#if defined(WITH_PDF2HTMLEX) - return internal::html::pdf2htmlEX_wrapper(pdf_file, output_path, config); -#else return internal::html::translate_pdf_file(pdf_file, output_path, config); -#endif } void html::edit(const Document &document, const char *diff) { diff --git a/src/odr/open_document_reader.cpp b/src/odr/open_document_reader.cpp index 04d8d865..8ad6dd09 100644 --- a/src/odr/open_document_reader.cpp +++ b/src/odr/open_document_reader.cpp @@ -7,11 +7,13 @@ #include #include #include -#include #include #include +#if defined(WITH_PDF2HTMLEX) +#include +#endif namespace odr { std::string OpenDocumentReader::version() noexcept { @@ -245,6 +247,14 @@ Html OpenDocumentReader::html(const PdfFile &pdf_file, return html::translate(pdf_file, output_path, config); } +#if defined(WITH_PDF2HTMLEX) +Html OpenDocumentReader::pdf2htmlEX(const PdfFile &pdf_file, + const std::string &output_path, + const HtmlConfig &config) { + return internal::html::pdf2htmlEX_wrapper(pdf_file, output_path, config); +} +#endif + void OpenDocumentReader::edit(const Document &document, const char *diff) { html::edit(document, diff); } diff --git a/src/odr/open_document_reader.hpp b/src/odr/open_document_reader.hpp index e73c993e..c46e994c 100644 --- a/src/odr/open_document_reader.hpp +++ b/src/odr/open_document_reader.hpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace odr { enum class FileType; @@ -139,6 +140,18 @@ class OpenDocumentReader final { const std::string &output_path, const HtmlConfig &config); +#if defined(WITH_PDF2HTMLEX) + /// @brief Translates a PDF file to HTML using pdf2htmlEX. + /// + /// @param pdf_file PDF file to translate. + /// @param output_path Path to save the HTML output. + /// @param config Configuration for the HTML output. + /// @return HTML output. + [[nodiscard]] static Html pdf2htmlEX(const PdfFile &pdf_file, + const std::string &output_path, + const HtmlConfig &config); +#endif + /// @brief Edit a document. /// @param document The document. /// @param diff The diff. From 0578221a6aa69300d63912baf35637138328df99 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 02:05:57 +0300 Subject: [PATCH 28/97] Add and expose wvWare_wrapper --- CMakeLists.txt | 1 + src/odr/internal/html/wvWare_wrapper.cpp | 58 ++++++++++++++++++++++++ src/odr/internal/html/wvWare_wrapper.hpp | 20 ++++++++ src/odr/open_document_reader.cpp | 12 +++++ src/odr/open_document_reader.hpp | 12 +++++ 5 files changed, 103 insertions(+) create mode 100644 src/odr/internal/html/wvWare_wrapper.cpp create mode 100644 src/odr/internal/html/wvWare_wrapper.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 317f0950..aa8e2095 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,6 +182,7 @@ if(WITH_PDF2HTMLEX) endif(WITH_PDF2HTMLEX) if(WITH_WVWARE) LIST(APPEND ODR_SOURCE_FILES + "src/odr/internal/html/wvWare_wrapper.cpp" "src/wvWare.c" ) endif(WITH_WVWARE) diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp new file mode 100644 index 00000000..2c682ed2 --- /dev/null +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -0,0 +1,58 @@ +#include + +#include +#include +#include + +#include + +#include + +extern "C" { + int convert(char *inputFile, char *outputDir, const char *password); + int no_graphics; +} + +namespace odr::internal { + +Html wvWare_wrapper(const File &file, const std::string &output_path, + const HtmlConfig &config) { + auto disk_path = file.disk_path(); + if (!disk_path.has_value()) { + throw FileNotFound(); + } + + auto output_file_path = output_path + "/document.html"; + + char *input_file_path = strdup(disk_path->c_str()); + char *output_dir = strdup(output_path.c_str()); + + no_graphics = 1; + + g_htmlOutputFileHandle = fopen(output_file_path.c_str(), "w"); + + std::string password; + int retVal = convert(input_file_path, output_dir, password.c_str()); + free(output_dir); + free(input_file_path); + fclose(g_htmlOutputFileHandle); + g_htmlOutputFileHandle = nullptr; + + if (0 != retVal) { + unlink(output_file_path.c_str()); + + switch (retVal) { + case 100: // PasswordRequired + case 101: // Wrong Password + throw WrongPassword(); + default: + throw std::runtime_error("Conversion error"); + } + } + + return {FileType::legacy_word_document, + config, + {{"document", output_file_path}}}; +} + +} // namespace odr::internal diff --git a/src/odr/internal/html/wvWare_wrapper.hpp b/src/odr/internal/html/wvWare_wrapper.hpp new file mode 100644 index 00000000..28775f1f --- /dev/null +++ b/src/odr/internal/html/wvWare_wrapper.hpp @@ -0,0 +1,20 @@ +#ifndef ODR_INTERNAL_WVWARE_WRAPPER_HPP +#define ODR_INTERNAL_WVWARE_WRAPPER_HPP + +#include + +namespace odr { +class File; + +struct HtmlConfig; +class Html; +} // namespace odr + +namespace odr::internal::html { + +Html wvWare_wrapper(const File &file, const std::string &output_path, + const HtmlConfig &config); + +} + +#endif // ODR_INTERNAL_WVWARE_WRAPPER_HPP diff --git a/src/odr/open_document_reader.cpp b/src/odr/open_document_reader.cpp index 8ad6dd09..2ceb30c9 100644 --- a/src/odr/open_document_reader.cpp +++ b/src/odr/open_document_reader.cpp @@ -14,6 +14,10 @@ #if defined(WITH_PDF2HTMLEX) #include #endif +#if defined(WITH_WVWARE) +#include +#endif + namespace odr { std::string OpenDocumentReader::version() noexcept { @@ -255,6 +259,14 @@ Html OpenDocumentReader::pdf2htmlEX(const PdfFile &pdf_file, } #endif +#if defined(WITH_WVWARE) +Html OpenDocumentReader::wvHtml(const File &file, + const std::string &output_path, + const HtmlConfig &config) { + return internal::html::wvWare_wrapper(file, output_path, config); +} +#endif + void OpenDocumentReader::edit(const Document &document, const char *diff) { html::edit(document, diff); } diff --git a/src/odr/open_document_reader.hpp b/src/odr/open_document_reader.hpp index c46e994c..ca615971 100644 --- a/src/odr/open_document_reader.hpp +++ b/src/odr/open_document_reader.hpp @@ -152,6 +152,18 @@ class OpenDocumentReader final { const HtmlConfig &config); #endif +#if defined(WITH_WVWARE) + /// @brief Translates a file to HTML. + /// + /// @param file File to translate. + /// @param output_path Path to save the HTML output. + /// @param config Configuration for the HTML output. + /// @return HTML output. + [[nodiscard]] static Html wvHtml(const File &file, + const std::string &output_path, + const HtmlConfig &config); +#endif + /// @brief Edit a document. /// @param document The document. /// @param diff The diff. From 3728064958e73b419a630a92b6a29f0b90574b21 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 02:11:18 +0300 Subject: [PATCH 29/97] Formatting --- src/odr/internal/html/wvWare_wrapper.cpp | 9 ++++----- src/odr/open_document_reader.cpp | 4 ++-- src/odr/open_document_reader.hpp | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index 2c682ed2..dfe35795 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -9,8 +9,8 @@ #include extern "C" { - int convert(char *inputFile, char *outputDir, const char *password); - int no_graphics; +int convert(char *inputFile, char *outputDir, const char *password); +int no_graphics; } namespace odr::internal { @@ -50,9 +50,8 @@ Html wvWare_wrapper(const File &file, const std::string &output_path, } } - return {FileType::legacy_word_document, - config, - {{"document", output_file_path}}}; + return { + FileType::legacy_word_document, config, {{"document", output_file_path}}}; } } // namespace odr::internal diff --git a/src/odr/open_document_reader.cpp b/src/odr/open_document_reader.cpp index 2ceb30c9..0338fd3a 100644 --- a/src/odr/open_document_reader.cpp +++ b/src/odr/open_document_reader.cpp @@ -253,8 +253,8 @@ Html OpenDocumentReader::html(const PdfFile &pdf_file, #if defined(WITH_PDF2HTMLEX) Html OpenDocumentReader::pdf2htmlEX(const PdfFile &pdf_file, - const std::string &output_path, - const HtmlConfig &config) { + const std::string &output_path, + const HtmlConfig &config) { return internal::html::pdf2htmlEX_wrapper(pdf_file, output_path, config); } #endif diff --git a/src/odr/open_document_reader.hpp b/src/odr/open_document_reader.hpp index ca615971..78bcd0cd 100644 --- a/src/odr/open_document_reader.hpp +++ b/src/odr/open_document_reader.hpp @@ -2,9 +2,9 @@ #define ODR_OPEN_DOCUMENT_READER_HPP #include +#include #include #include -#include namespace odr { enum class FileType; From cba968e2cb2c274eb67667c57ec70a6a27421ce7 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 02:16:35 +0300 Subject: [PATCH 30/97] #include unistd.h in wvWare_wrapper.cpp to find unlink() --- src/odr/internal/html/wvWare_wrapper.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index dfe35795..74292ba8 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -1,12 +1,10 @@ -#include - #include #include #include - #include - +#include #include +#include extern "C" { int convert(char *inputFile, char *outputDir, const char *password); From e5b1f72c084914d21486c92c3ab6b1a0e2ea1707 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 02:18:48 +0300 Subject: [PATCH 31/97] Formatting --- src/odr/html.hpp | 7 +++---- src/odr/internal/html/wvWare_wrapper.cpp | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/odr/html.hpp b/src/odr/html.hpp index ecf9199c..acefa580 100644 --- a/src/odr/html.hpp +++ b/src/odr/html.hpp @@ -113,10 +113,9 @@ namespace html { /// @param config Configuration for the HTML output. /// @param password_callback Callback to get the password for encrypted files. /// @return HTML output. -[[deprecated]] -Html translate(const File &file, const std::string &output_path, - const HtmlConfig &config, - const PasswordCallback &password_callback); +[[deprecated]] Html translate(const File &file, const std::string &output_path, + const HtmlConfig &config, + const PasswordCallback &password_callback); /// @brief Translates a decoded file to HTML. /// /// @param file Decoded file to translate. diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index 74292ba8..46618890 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -3,8 +3,8 @@ #include #include #include -#include #include +#include extern "C" { int convert(char *inputFile, char *outputDir, const char *password); From 35298f9c79cab7ca14ccfd95f07a1ed6b0461451 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 02:25:58 +0300 Subject: [PATCH 32/97] Don't build dependencies in CI, they should be built already. Error out otherwise --- .github/workflows/build_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index ff10c55b..671cfbbb 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -50,7 +50,7 @@ jobs: - name: conan config run: conan config install .github/config/${{ matrix.os }}-${{ matrix.compiler }}/conan - name: conan install - run: conan install . --output-folder=build --build=missing + run: conan install . --output-folder=build --build=never - name: cache uses: actions/cache@v4 @@ -283,7 +283,7 @@ jobs: run: conan export . --name odrcore --version 0.0.0 - name: conan install - run: conan install . --output-folder=build --build=missing + run: conan install . --output-folder=build --build=never # odrcore/0.0.0 will be missing, can't build=never - name: conan downstream From 4299c5d1d7781db6370954eea16816ba124ba1c4 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 03:11:07 +0300 Subject: [PATCH 33/97] Revert accidentally changed formatting in html.cpp --- src/odr/html.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/odr/html.hpp b/src/odr/html.hpp index acefa580..ecf9199c 100644 --- a/src/odr/html.hpp +++ b/src/odr/html.hpp @@ -113,9 +113,10 @@ namespace html { /// @param config Configuration for the HTML output. /// @param password_callback Callback to get the password for encrypted files. /// @return HTML output. -[[deprecated]] Html translate(const File &file, const std::string &output_path, - const HtmlConfig &config, - const PasswordCallback &password_callback); +[[deprecated]] +Html translate(const File &file, const std::string &output_path, + const HtmlConfig &config, + const PasswordCallback &password_callback); /// @brief Translates a decoded file to HTML. /// /// @param file Decoded file to translate. From 1596670f908e569207888868c7fec3354353b9f4 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 03:24:30 +0300 Subject: [PATCH 34/97] Update wvWare wrapper --- CMakeLists.txt | 3 ++- src/odr/internal/html/wvWare_wrapper.cpp | 15 ++++++++++++--- src/wvWare.c | 21 ++++++++++++--------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa8e2095..14079bf2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -211,7 +211,8 @@ if(WITH_PDF2HTMLEX) endif(WITH_PDF2HTMLEX) if(WITH_WVWARE) find_package(wvware REQUIRED) - target_link_libraries(odr PRIVATE wvware::wvware) + # target "meta" errors out if wvware is linked privately + target_link_libraries(odr PUBLIC wvware::wvware) endif(WITH_WVWARE) if (EXISTS "${PROJECT_SOURCE_DIR}/.git") diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index 46618890..deb3e4f2 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -8,7 +8,11 @@ extern "C" { int convert(char *inputFile, char *outputDir, const char *password); -int no_graphics; +int no_graphics = 1; +int documentId = 0; + +char *s_WVDATADIR = NULL; +char *s_HTMLCONFIG = NULL; } namespace odr::internal { @@ -20,15 +24,20 @@ Html wvWare_wrapper(const File &file, const std::string &output_path, throw FileNotFound(); } + // @TODO: getenv() +// s_WVDATADIR = NULL; +// s_HTMLCONFIG = NULL; + auto output_file_path = output_path + "/document.html"; char *input_file_path = strdup(disk_path->c_str()); char *output_dir = strdup(output_path.c_str()); - no_graphics = 1; - g_htmlOutputFileHandle = fopen(output_file_path.c_str(), "w"); + documentId++; + + // @TODO: password std::string password; int retVal = convert(input_file_path, output_dir, password.c_str()); free(output_dir); diff --git a/src/wvWare.c b/src/wvWare.c index 46128e3c..440e656c 100644 --- a/src/wvWare.c +++ b/src/wvWare.c @@ -17,9 +17,7 @@ * 02111-1307, USA. */ -//#ifdef HAVE_CONFIG_H #include -//#endif #include #include @@ -50,12 +48,6 @@ #define wvTrace( args ) #endif -/* strdup isn't declared in for `gcc -ansi'; declare it here */ -extern char *strdup (const char *); - -extern char *str_copy(char *d, size_t n, char *s); -extern char *str_append(char *d, size_t n, char *s); - extern char *s_WVDATADIR; extern char *s_HTMLCONFIG; extern int documentId; @@ -67,7 +59,18 @@ extern int documentId; } \ } -extern char * strdup_and_append_twice(const char * a, const char * b, const char * c); +char *strdup_and_append_twice(const char *a, const char *b, const char *c) { + const size_t szA = strlen(a); + const size_t szB = strlen(b); + const size_t szC = strlen(c); + + char *buf = malloc(szA + szB + szC + 1); + strcpy(buf, a); + strcpy(buf + szA, b); + strcpy(buf + szA + szB, c); + buf[szA + szB + szC] = '\0'; + return buf; +} /* Released under GPL, written by Caolan.McNamara@ul.ie. From 3e4f52b2a2146c4e9c2848228a8f2c25947b595e Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 03:27:11 +0300 Subject: [PATCH 35/97] clang format wvWare_wrapper.cpp --- src/odr/internal/html/wvWare_wrapper.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index deb3e4f2..24441535 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -25,8 +25,8 @@ Html wvWare_wrapper(const File &file, const std::string &output_path, } // @TODO: getenv() -// s_WVDATADIR = NULL; -// s_HTMLCONFIG = NULL; + // s_WVDATADIR = NULL; + // s_HTMLCONFIG = NULL; auto output_file_path = output_path + "/document.html"; From 2e164bea9e6a9a412ac3454d3f0493aed3bfb5cb Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 03:45:13 +0300 Subject: [PATCH 36/97] Update wvWare wrapper --- CMakeLists.txt | 3 +-- src/odr/internal/html/wvWare_wrapper.cpp | 13 ++++++------- src/wvWare.c | 7 ++++--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 14079bf2..aa8e2095 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -211,8 +211,7 @@ if(WITH_PDF2HTMLEX) endif(WITH_PDF2HTMLEX) if(WITH_WVWARE) find_package(wvware REQUIRED) - # target "meta" errors out if wvware is linked privately - target_link_libraries(odr PUBLIC wvware::wvware) + target_link_libraries(odr PRIVATE wvware::wvware) endif(WITH_WVWARE) if (EXISTS "${PROJECT_SOURCE_DIR}/.git") diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index 24441535..c1f72059 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -8,14 +8,13 @@ extern "C" { int convert(char *inputFile, char *outputDir, const char *password); -int no_graphics = 1; -int documentId = 0; - -char *s_WVDATADIR = NULL; -char *s_HTMLCONFIG = NULL; +extern int no_graphics; +extern int documentId; +extern char *s_WVDATADIR; +extern char *s_HTMLCONFIG; } -namespace odr::internal { +namespace odr::internal::html { Html wvWare_wrapper(const File &file, const std::string &output_path, const HtmlConfig &config) { @@ -61,4 +60,4 @@ Html wvWare_wrapper(const File &file, const std::string &output_path, FileType::legacy_word_document, config, {{"document", output_file_path}}}; } -} // namespace odr::internal +} // namespace odr::internal::html diff --git a/src/wvWare.c b/src/wvWare.c index 440e656c..556d9c83 100644 --- a/src/wvWare.c +++ b/src/wvWare.c @@ -48,9 +48,10 @@ #define wvTrace( args ) #endif -extern char *s_WVDATADIR; -extern char *s_HTMLCONFIG; -extern int documentId; +char *s_WVDATADIR = NULL; +char *s_HTMLCONFIG = NULL; +int documentId = 0; + #define static_reinit( variable, defaultValue ) { \ static int staticVarValue = 0; \ if (staticVarValue != documentId) { \ From 7dd61fc30147d158f55e696e2412fa2e7c01b945 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 22 Aug 2024 15:09:22 +0300 Subject: [PATCH 37/97] Install .hpp files from ${CMAKE_CURRENT_BINARY_DIR}/src/ too (generated project_info.hpp) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa8e2095..03e80af1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,7 +231,7 @@ if (ODR_CLANG_TIDY) endif () install( - DIRECTORY src/ + DIRECTORY src/ ${CMAKE_CURRENT_BINARY_DIR}/src/ DESTINATION include/ FILES_MATCHING PATTERN "*.hpp" ) From 05cb08632f6a583de2ee1fa5d4539e3886bfa0a2 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 17:27:30 +0300 Subject: [PATCH 38/97] Update WVDATADIR setting --- CMakeLists.txt | 2 ++ conanfile.py | 2 ++ src/odr/internal/project_info.hpp.in | 2 +- src/wvWare.c | 21 +++++++++++++++------ src/wvWare.h.in | 7 +++++++ 5 files changed, 27 insertions(+), 7 deletions(-) create mode 100644 src/wvWare.h.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 03e80af1..6535a1cc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,8 @@ find_package(utf8cpp REQUIRED) configure_file("src/odr/internal/project_info.cpp.in" "src/odr/internal/project_info.cpp") configure_file("src/odr/internal/project_info.hpp.in" "src/odr/internal/project_info.hpp") +configure_file("src/wvWare.h.in" "src/wvWare.h") + set(PRE_CONFIGURE_FILE "src/odr/internal/git_info.cpp.in") set(POST_CONFIGURE_FILE "${CMAKE_CURRENT_BINARY_DIR}/src/odr/internal/git_info.cpp") if (EXISTS "${PROJECT_SOURCE_DIR}/.git") diff --git a/conanfile.py b/conanfile.py index ea7f5d3c..528eb8bc 100644 --- a/conanfile.py +++ b/conanfile.py @@ -69,6 +69,8 @@ def generate(self): tc.variables["ODR_TEST"] = False tc.variables["WITH_PDF2HTMLEX"] = self.options.get_safe("with_pdf2htmlEX", False) tc.variables["WITH_WVWARE"] = self.options.get_safe("with_wvWare", False) + if self.options.get_safe("with_wvWare", False): + tc.variables["WVDATADIR"] = self.dependencies['wvware'].cpp_info.resdirs[0] tc.generate() deps = CMakeDeps(self) diff --git a/src/odr/internal/project_info.hpp.in b/src/odr/internal/project_info.hpp.in index d93c9d88..93d3c343 100644 --- a/src/odr/internal/project_info.hpp.in +++ b/src/odr/internal/project_info.hpp.in @@ -6,6 +6,6 @@ const char *version() noexcept; } // namespace odr::internal::project_info #cmakedefine WITH_PDF2HTMLEX 1 -#cmakedefine WITH_WVWARE 1 +#include #endif // ODR_INTERNAL_PROJECT_INFO_HPP diff --git a/src/wvWare.c b/src/wvWare.c index 556d9c83..c3f769a0 100644 --- a/src/wvWare.c +++ b/src/wvWare.c @@ -27,6 +27,8 @@ #include #include "getopt.h" +#include + #ifdef __ANDROID_API__ #include #define ParenthesesStripper(...) __VA_ARGS__ @@ -48,8 +50,6 @@ #define wvTrace( args ) #endif -char *s_WVDATADIR = NULL; -char *s_HTMLCONFIG = NULL; int documentId = 0; #define static_reinit( variable, defaultValue ) { \ @@ -1748,6 +1748,15 @@ myCharProc (wvParseStruct * ps, U16 eachchar, U8 chartype, U16 lid) return (0); } +const char *get_data_dir() +{ + const char *data_dir = getenv("WVDATADIR"); + if (NULL == data_dir) { + data_dir = WVDATADIR; + } + return data_dir; +} + int wvOpenConfig (state_data *myhandle,char *config) { @@ -1765,18 +1774,18 @@ wvOpenConfig (state_data *myhandle,char *config) if (NULL != buf) { free(buf); } - buf = strdup_and_append_twice(s_WVDATADIR, "/", config); + buf = strdup_and_append_twice(get_data_dir(), "/", config); config = buf; tmp = fopen(config, "rb"); } if (tmp == NULL) { + char * html_config = strdup_and_append_twice(get_data_dir(), "/", "wvHtml.xml"); if (i) wvError ( - ("Attempt to open %s failed, using %s\n", config, - s_HTMLCONFIG)); - config = s_HTMLCONFIG; + ("Attempt to open %s failed, using %s\n", config, html_config)); + config = html_config; tmp = fopen (config, "rb"); } myhandle->path = config; diff --git a/src/wvWare.h.in b/src/wvWare.h.in new file mode 100644 index 00000000..6758e260 --- /dev/null +++ b/src/wvWare.h.in @@ -0,0 +1,7 @@ +#ifndef WVWARE_H +#define WVWARE_H + +#cmakedefine WITH_WVWARE 1 +#cmakedefine WVDATADIR "@WVDATADIR@" + +#endif //WVWARE_H From 9d2f359c835d92af691df0c4d2418a12167d3067 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 17:45:35 +0300 Subject: [PATCH 39/97] Install .h headers too --- conanfile.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conanfile.py b/conanfile.py index 528eb8bc..d2d5bcea 100644 --- a/conanfile.py +++ b/conanfile.py @@ -88,6 +88,12 @@ def package(self): src=os.path.join(self.recipe_folder, "src"), dst=os.path.join(self.export_sources_folder, "include"), ) + copy( + self, + "*.h", + src=os.path.join(self.recipe_folder, "src"), + dst=os.path.join(self.export_sources_folder, "include"), + ) cmake = CMake(self) cmake.install() From 79ac7b30fb76ee6c1d4b660bd297eb49c1364128 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 18:09:37 +0300 Subject: [PATCH 40/97] Update header installation. Install .h headers too, not just .hpp --- CMakeLists.txt | 6 +++--- conanfile.py | 13 ------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6535a1cc..2b8be259 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,11 +190,11 @@ if(WITH_WVWARE) endif(WITH_WVWARE) add_library(odr ${ODR_SOURCE_FILES}) -target_include_directories(odr PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/src) set_target_properties(odr PROPERTIES OUTPUT_NAME odr) target_include_directories(odr PUBLIC src + ${CMAKE_CURRENT_BINARY_DIR}/src ) target_link_libraries(odr PRIVATE @@ -234,8 +234,8 @@ endif () install( DIRECTORY src/ ${CMAKE_CURRENT_BINARY_DIR}/src/ - DESTINATION include/ - FILES_MATCHING PATTERN "*.hpp" + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ + FILES_MATCHING REGEX ".*\\.(h|hpp)$" ) install( TARGETS odr meta translate back_translate diff --git a/conanfile.py b/conanfile.py index d2d5bcea..6a325002 100644 --- a/conanfile.py +++ b/conanfile.py @@ -82,19 +82,6 @@ def build(self): cmake.build() def package(self): - copy( - self, - "*.hpp", - src=os.path.join(self.recipe_folder, "src"), - dst=os.path.join(self.export_sources_folder, "include"), - ) - copy( - self, - "*.h", - src=os.path.join(self.recipe_folder, "src"), - dst=os.path.join(self.export_sources_folder, "include"), - ) - cmake = CMake(self) cmake.install() From c385c8c8b85fea48831aeab3bd8851043c1ed0b7 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 18:51:25 +0300 Subject: [PATCH 41/97] Update wvWare and pdf2htmlEX wrappers --- src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 8 ++------ src/odr/internal/html/pdf2htmlEX_wrapper.hpp | 2 +- src/odr/internal/html/wvWare_wrapper.cpp | 13 ++----------- src/odr/internal/html/wvWare_wrapper.hpp | 2 +- src/odr/open_document_reader.cpp | 8 ++++---- src/odr/open_document_reader.hpp | 10 +++++----- 6 files changed, 15 insertions(+), 28 deletions(-) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index 30fc7fa0..47c40afd 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -10,16 +10,12 @@ namespace odr::internal { -Html html::pdf2htmlEX_wrapper(const PdfFile &pdf_file, +Html html::pdf2htmlEX_wrapper(const std::string &input_path, const std::string &output_path, const HtmlConfig &config) { pdf2htmlEX::pdf2htmlEX pdf2htmlEX; - auto disk_path = pdf_file.file().disk_path(); - if (!disk_path.has_value()) { - throw FileNotFound(); - } - pdf2htmlEX.setInputFilename(disk_path.value()); + pdf2htmlEX.setInputFilename(input_path); pdf2htmlEX.setDestinationDir(output_path); auto output_file_name = "document.html"; pdf2htmlEX.setOutputFilename(output_file_name); diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.hpp b/src/odr/internal/html/pdf2htmlEX_wrapper.hpp index b577af86..b71f8b0a 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.hpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.hpp @@ -12,7 +12,7 @@ class Html; namespace odr::internal::html { -Html pdf2htmlEX_wrapper(const PdfFile &pdf_file, const std::string &output_path, +Html pdf2htmlEX_wrapper(const std::string &input_path, const std::string &output_path, const HtmlConfig &config); } diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index c1f72059..dfc181c6 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -16,20 +16,11 @@ extern char *s_HTMLCONFIG; namespace odr::internal::html { -Html wvWare_wrapper(const File &file, const std::string &output_path, +Html wvWare_wrapper(const std::string &input_path, const std::string &output_path, const HtmlConfig &config) { - auto disk_path = file.disk_path(); - if (!disk_path.has_value()) { - throw FileNotFound(); - } - - // @TODO: getenv() - // s_WVDATADIR = NULL; - // s_HTMLCONFIG = NULL; - auto output_file_path = output_path + "/document.html"; - char *input_file_path = strdup(disk_path->c_str()); + char *input_file_path = strdup(input_path.c_str()); char *output_dir = strdup(output_path.c_str()); g_htmlOutputFileHandle = fopen(output_file_path.c_str(), "w"); diff --git a/src/odr/internal/html/wvWare_wrapper.hpp b/src/odr/internal/html/wvWare_wrapper.hpp index 28775f1f..5b1181ba 100644 --- a/src/odr/internal/html/wvWare_wrapper.hpp +++ b/src/odr/internal/html/wvWare_wrapper.hpp @@ -12,7 +12,7 @@ class Html; namespace odr::internal::html { -Html wvWare_wrapper(const File &file, const std::string &output_path, +Html wvWare_wrapper(const std::string &input_path, const std::string &output_path, const HtmlConfig &config); } diff --git a/src/odr/open_document_reader.cpp b/src/odr/open_document_reader.cpp index 0338fd3a..d2e35011 100644 --- a/src/odr/open_document_reader.cpp +++ b/src/odr/open_document_reader.cpp @@ -252,18 +252,18 @@ Html OpenDocumentReader::html(const PdfFile &pdf_file, } #if defined(WITH_PDF2HTMLEX) -Html OpenDocumentReader::pdf2htmlEX(const PdfFile &pdf_file, +Html OpenDocumentReader::pdf2htmlEX(const std::string &input_path, const std::string &output_path, const HtmlConfig &config) { - return internal::html::pdf2htmlEX_wrapper(pdf_file, output_path, config); + return internal::html::pdf2htmlEX_wrapper(input_path, output_path, config); } #endif #if defined(WITH_WVWARE) -Html OpenDocumentReader::wvHtml(const File &file, +Html OpenDocumentReader::wvHtml(const std::string &input_path, const std::string &output_path, const HtmlConfig &config) { - return internal::html::wvWare_wrapper(file, output_path, config); + return internal::html::wvWare_wrapper(input_path, output_path, config); } #endif diff --git a/src/odr/open_document_reader.hpp b/src/odr/open_document_reader.hpp index 78bcd0cd..e9f6b086 100644 --- a/src/odr/open_document_reader.hpp +++ b/src/odr/open_document_reader.hpp @@ -143,23 +143,23 @@ class OpenDocumentReader final { #if defined(WITH_PDF2HTMLEX) /// @brief Translates a PDF file to HTML using pdf2htmlEX. /// - /// @param pdf_file PDF file to translate. + /// @param input_path Path to the file to translate. /// @param output_path Path to save the HTML output. /// @param config Configuration for the HTML output. /// @return HTML output. - [[nodiscard]] static Html pdf2htmlEX(const PdfFile &pdf_file, + [[nodiscard]] static Html pdf2htmlEX(const std::string &input_path, const std::string &output_path, const HtmlConfig &config); #endif #if defined(WITH_WVWARE) - /// @brief Translates a file to HTML. + /// @brief Translates a doc file to HTML using wvWare. /// - /// @param file File to translate. + /// @param input_path Path to the file to translate. /// @param output_path Path to save the HTML output. /// @param config Configuration for the HTML output. /// @return HTML output. - [[nodiscard]] static Html wvHtml(const File &file, + [[nodiscard]] static Html wvHtml(const std::string &input_path, const std::string &output_path, const HtmlConfig &config); #endif From 79d81179d117465a12b4d970296b0b03f85785a3 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 19:01:10 +0300 Subject: [PATCH 42/97] Formatting --- src/odr/internal/html/pdf2htmlEX_wrapper.hpp | 3 ++- src/odr/internal/html/wvWare_wrapper.cpp | 4 ++-- src/odr/internal/html/wvWare_wrapper.hpp | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.hpp b/src/odr/internal/html/pdf2htmlEX_wrapper.hpp index b71f8b0a..f05a6d35 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.hpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.hpp @@ -12,7 +12,8 @@ class Html; namespace odr::internal::html { -Html pdf2htmlEX_wrapper(const std::string &input_path, const std::string &output_path, +Html pdf2htmlEX_wrapper(const std::string &input_path, + const std::string &output_path, const HtmlConfig &config); } diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index dfc181c6..4fea6527 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -16,8 +16,8 @@ extern char *s_HTMLCONFIG; namespace odr::internal::html { -Html wvWare_wrapper(const std::string &input_path, const std::string &output_path, - const HtmlConfig &config) { +Html wvWare_wrapper(const std::string &input_path, + const std::string &output_path, const HtmlConfig &config) { auto output_file_path = output_path + "/document.html"; char *input_file_path = strdup(input_path.c_str()); diff --git a/src/odr/internal/html/wvWare_wrapper.hpp b/src/odr/internal/html/wvWare_wrapper.hpp index 5b1181ba..03f21489 100644 --- a/src/odr/internal/html/wvWare_wrapper.hpp +++ b/src/odr/internal/html/wvWare_wrapper.hpp @@ -12,8 +12,8 @@ class Html; namespace odr::internal::html { -Html wvWare_wrapper(const std::string &input_path, const std::string &output_path, - const HtmlConfig &config); +Html wvWare_wrapper(const std::string &input_path, + const std::string &output_path, const HtmlConfig &config); } From 9bcdfe7887a3160999df9303d195a11a462059c9 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 19:28:55 +0300 Subject: [PATCH 43/97] Allow passing passwords to pdf2htmlEX and wvWare --- src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 11 ++++++----- src/odr/internal/html/pdf2htmlEX_wrapper.hpp | 4 +++- src/odr/internal/html/wvWare_wrapper.cpp | 11 +++++++---- src/odr/internal/html/wvWare_wrapper.hpp | 3 ++- src/odr/open_document_reader.cpp | 12 ++++++++---- src/odr/open_document_reader.hpp | 7 +++++-- 6 files changed, 31 insertions(+), 17 deletions(-) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index 47c40afd..012dbe72 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -12,7 +12,8 @@ namespace odr::internal { Html html::pdf2htmlEX_wrapper(const std::string &input_path, const std::string &output_path, - const HtmlConfig &config) { + const HtmlConfig &config, + std::optional &password) { pdf2htmlEX::pdf2htmlEX pdf2htmlEX; pdf2htmlEX.setInputFilename(input_path); @@ -24,10 +25,10 @@ Html html::pdf2htmlEX_wrapper(const std::string &input_path, pdf2htmlEX.setProcessOutline(false); pdf2htmlEX.setProcessAnnotation(true); - // @TODO: - // if (options.password != null) { - // pdf2htmlEX.setOwnerPassword(options.password).setUserPassword(options.password); - // } + if (password.has_value()) { + pdf2htmlEX.setOwnerPassword(password.value()); + pdf2htmlEX.setUserPassword(password.value()); + } try { pdf2htmlEX.convert(); diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.hpp b/src/odr/internal/html/pdf2htmlEX_wrapper.hpp index f05a6d35..ace0e5ce 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.hpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.hpp @@ -1,6 +1,7 @@ #ifndef ODR_INTERNAL_PDF2HTMLEX_WRAPPER_HPP #define ODR_INTERNAL_PDF2HTMLEX_WRAPPER_HPP +#include #include namespace odr { @@ -14,7 +15,8 @@ namespace odr::internal::html { Html pdf2htmlEX_wrapper(const std::string &input_path, const std::string &output_path, - const HtmlConfig &config); + const HtmlConfig &config, + std::optional &password); } diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index 4fea6527..dc13baf3 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -17,7 +17,8 @@ extern char *s_HTMLCONFIG; namespace odr::internal::html { Html wvWare_wrapper(const std::string &input_path, - const std::string &output_path, const HtmlConfig &config) { + const std::string &output_path, const HtmlConfig &config, + std::optional &password) { auto output_file_path = output_path + "/document.html"; char *input_file_path = strdup(input_path.c_str()); @@ -27,9 +28,11 @@ Html wvWare_wrapper(const std::string &input_path, documentId++; - // @TODO: password - std::string password; - int retVal = convert(input_file_path, output_dir, password.c_str()); + std::string password_value; + if (password.has_value()) { + password_value = password.value(); + } + int retVal = convert(input_file_path, output_dir, password_value.c_str()); free(output_dir); free(input_file_path); fclose(g_htmlOutputFileHandle); diff --git a/src/odr/internal/html/wvWare_wrapper.hpp b/src/odr/internal/html/wvWare_wrapper.hpp index 03f21489..0d9e4e06 100644 --- a/src/odr/internal/html/wvWare_wrapper.hpp +++ b/src/odr/internal/html/wvWare_wrapper.hpp @@ -13,7 +13,8 @@ class Html; namespace odr::internal::html { Html wvWare_wrapper(const std::string &input_path, - const std::string &output_path, const HtmlConfig &config); + const std::string &output_path, const HtmlConfig &config, + std::optional &password); } diff --git a/src/odr/open_document_reader.cpp b/src/odr/open_document_reader.cpp index d2e35011..73c9a235 100644 --- a/src/odr/open_document_reader.cpp +++ b/src/odr/open_document_reader.cpp @@ -254,16 +254,20 @@ Html OpenDocumentReader::html(const PdfFile &pdf_file, #if defined(WITH_PDF2HTMLEX) Html OpenDocumentReader::pdf2htmlEX(const std::string &input_path, const std::string &output_path, - const HtmlConfig &config) { - return internal::html::pdf2htmlEX_wrapper(input_path, output_path, config); + const HtmlConfig &config, + std::optional &password) { + return internal::html::pdf2htmlEX_wrapper(input_path, output_path, config, + password); } #endif #if defined(WITH_WVWARE) Html OpenDocumentReader::wvHtml(const std::string &input_path, const std::string &output_path, - const HtmlConfig &config) { - return internal::html::wvWare_wrapper(input_path, output_path, config); + const HtmlConfig &config, + std::optional &password) { + return internal::html::wvWare_wrapper(input_path, output_path, config, + password); } #endif diff --git a/src/odr/open_document_reader.hpp b/src/odr/open_document_reader.hpp index e9f6b086..3849389c 100644 --- a/src/odr/open_document_reader.hpp +++ b/src/odr/open_document_reader.hpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -149,7 +150,8 @@ class OpenDocumentReader final { /// @return HTML output. [[nodiscard]] static Html pdf2htmlEX(const std::string &input_path, const std::string &output_path, - const HtmlConfig &config); + const HtmlConfig &config, + std::optional &password); #endif #if defined(WITH_WVWARE) @@ -161,7 +163,8 @@ class OpenDocumentReader final { /// @return HTML output. [[nodiscard]] static Html wvHtml(const std::string &input_path, const std::string &output_path, - const HtmlConfig &config); + const HtmlConfig &config, + std::optional &password); #endif /// @brief Edit a document. From eab720ebeafab11a5ae4bf58320d2a9dc1f7cd69 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 20:02:06 +0300 Subject: [PATCH 44/97] Clean-up wvWare --- CMakeLists.txt | 4 +--- src/odr/internal/html/wvWare_wrapper.cpp | 2 -- src/odr/internal/project_info.hpp.in | 2 +- src/wvWare.c | 28 +++++++++++------------- 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b8be259..5eb63fe1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,8 +42,6 @@ find_package(utf8cpp REQUIRED) configure_file("src/odr/internal/project_info.cpp.in" "src/odr/internal/project_info.cpp") configure_file("src/odr/internal/project_info.hpp.in" "src/odr/internal/project_info.hpp") -configure_file("src/wvWare.h.in" "src/wvWare.h") - set(PRE_CONFIGURE_FILE "src/odr/internal/git_info.cpp.in") set(POST_CONFIGURE_FILE "${CMAKE_CURRENT_BINARY_DIR}/src/odr/internal/git_info.cpp") if (EXISTS "${PROJECT_SOURCE_DIR}/.git") @@ -235,7 +233,7 @@ endif () install( DIRECTORY src/ ${CMAKE_CURRENT_BINARY_DIR}/src/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ - FILES_MATCHING REGEX ".*\\.(h|hpp)$" + FILES_MATCHING PATTERN ".hpp" ) install( TARGETS odr meta translate back_translate diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index dc13baf3..48c2998e 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -10,8 +10,6 @@ extern "C" { int convert(char *inputFile, char *outputDir, const char *password); extern int no_graphics; extern int documentId; -extern char *s_WVDATADIR; -extern char *s_HTMLCONFIG; } namespace odr::internal::html { diff --git a/src/odr/internal/project_info.hpp.in b/src/odr/internal/project_info.hpp.in index 93d3c343..d93c9d88 100644 --- a/src/odr/internal/project_info.hpp.in +++ b/src/odr/internal/project_info.hpp.in @@ -6,6 +6,6 @@ const char *version() noexcept; } // namespace odr::internal::project_info #cmakedefine WITH_PDF2HTMLEX 1 -#include +#cmakedefine WITH_WVWARE 1 #endif // ODR_INTERNAL_PROJECT_INFO_HPP diff --git a/src/wvWare.c b/src/wvWare.c index c3f769a0..0b653702 100644 --- a/src/wvWare.c +++ b/src/wvWare.c @@ -27,8 +27,6 @@ #include #include "getopt.h" -#include - #ifdef __ANDROID_API__ #include #define ParenthesesStripper(...) __VA_ARGS__ @@ -1748,15 +1746,6 @@ myCharProc (wvParseStruct * ps, U16 eachchar, U8 chartype, U16 lid) return (0); } -const char *get_data_dir() -{ - const char *data_dir = getenv("WVDATADIR"); - if (NULL == data_dir) { - data_dir = WVDATADIR; - } - return data_dir; -} - int wvOpenConfig (state_data *myhandle,char *config) { @@ -1770,21 +1759,30 @@ wvOpenConfig (state_data *myhandle,char *config) if(tmp == NULL) { + const char *wv_data_dir = getenv("WVDATADIR"); + if (NULL == wv_data_dir) { + wvError (("Env var WVDATADIR unset!")); + return 0; + } static char * buf = NULL; if (NULL != buf) { free(buf); } - buf = strdup_and_append_twice(get_data_dir(), "/", config); + buf = strdup_and_append_twice(wv_data_dir, "/", config); config = buf; tmp = fopen(config, "rb"); } if (tmp == NULL) { - char * html_config = strdup_and_append_twice(get_data_dir(), "/", "wvHtml.xml"); + const char *wv_data_dir = getenv("WVDATADIR"); + if (NULL == wv_data_dir) { + wvError (("Env var WVDATADIR unset!")); + return 0; + } + char * html_config = strdup_and_append_twice(wv_data_dir, "/", "wvHtml.xml"); if (i) - wvError ( - ("Attempt to open %s failed, using %s\n", config, html_config)); + wvError (("Attempt to open %s failed, using %s\n", config, html_config)); config = html_config; tmp = fopen (config, "rb"); } From 97999782c4c268752ca1e1a522deb3c2bf4c59a4 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 20:08:11 +0300 Subject: [PATCH 45/97] More wvWare cleanup --- conanfile.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/conanfile.py b/conanfile.py index 6a325002..5c19a25f 100644 --- a/conanfile.py +++ b/conanfile.py @@ -69,8 +69,6 @@ def generate(self): tc.variables["ODR_TEST"] = False tc.variables["WITH_PDF2HTMLEX"] = self.options.get_safe("with_pdf2htmlEX", False) tc.variables["WITH_WVWARE"] = self.options.get_safe("with_wvWare", False) - if self.options.get_safe("with_wvWare", False): - tc.variables["WVDATADIR"] = self.dependencies['wvware'].cpp_info.resdirs[0] tc.generate() deps = CMakeDeps(self) From 2f9389ac183508f6a36880c15330ab01eef90b07 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 20:09:29 +0300 Subject: [PATCH 46/97] More wvWare cleanup --- src/wvWare.h.in | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 src/wvWare.h.in diff --git a/src/wvWare.h.in b/src/wvWare.h.in deleted file mode 100644 index 6758e260..00000000 --- a/src/wvWare.h.in +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef WVWARE_H -#define WVWARE_H - -#cmakedefine WITH_WVWARE 1 -#cmakedefine WVDATADIR "@WVDATADIR@" - -#endif //WVWARE_H From f9bdad4c5bac2da9877d9125de18b95140e4e027 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 25 Aug 2024 20:19:11 +0300 Subject: [PATCH 47/97] Fix header install --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5eb63fe1..55aaf0d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -232,8 +232,8 @@ endif () install( DIRECTORY src/ ${CMAKE_CURRENT_BINARY_DIR}/src/ - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ - FILES_MATCHING PATTERN ".hpp" + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + FILES_MATCHING PATTERN "*.hpp" ) install( TARGETS odr meta translate back_translate From 19f2a64e93a6b97ab37f468d5dc48fc74fe45481 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 26 Aug 2024 13:32:33 +0300 Subject: [PATCH 48/97] Update with_pdf2htmlEX and with_wvWare options, allow, but default False on Windows --- conanfile.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/conanfile.py b/conanfile.py index 5c19a25f..e4675db8 100644 --- a/conanfile.py +++ b/conanfile.py @@ -25,8 +25,6 @@ class OpenDocumentCoreConan(ConanFile): default_options = { "shared": False, "fPIC": True, - "with_pdf2htmlEX": True, - "with_wvWare": True, } def requirements(self): @@ -54,10 +52,11 @@ def validate_build(self): def config_options(self): if self.settings.os == "Windows": del self.options.fPIC - # @TODO: ideally Windows should just default_options['with_pdf2htmlEX'] = False - # But by the time config_options() is executed, default_options is already done parsed. - del self.options.with_pdf2htmlEX - del self.options.with_wvWare + self.options.with_pdf2htmlEX = False + self.options.with_wvWare = False + else: + self.options.with_pdf2htmlEX = True + self.options.with_wvWare = True def configure(self): if self.options.shared: From ae8acdc9ed7b934cfb81aa062dcc6c4da9062c0c Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 26 Aug 2024 23:06:48 +0300 Subject: [PATCH 49/97] Move wvWare.c to wvWare conan package --- CMakeLists.txt | 1 - src/odr/internal/html/wvWare_wrapper.cpp | 4 +- src/wvWare.c | 1928 ---------------------- 3 files changed, 1 insertion(+), 1932 deletions(-) delete mode 100644 src/wvWare.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 55aaf0d9..ce033240 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -183,7 +183,6 @@ endif(WITH_PDF2HTMLEX) if(WITH_WVWARE) LIST(APPEND ODR_SOURCE_FILES "src/odr/internal/html/wvWare_wrapper.cpp" - "src/wvWare.c" ) endif(WITH_WVWARE) diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index 48c2998e..29e847f7 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -24,13 +24,11 @@ Html wvWare_wrapper(const std::string &input_path, g_htmlOutputFileHandle = fopen(output_file_path.c_str(), "w"); - documentId++; - std::string password_value; if (password.has_value()) { password_value = password.value(); } - int retVal = convert(input_file_path, output_dir, password_value.c_str()); + int retVal = wvHtml_convert(input_file_path, output_dir, password_value.c_str()); free(output_dir); free(input_file_path); fclose(g_htmlOutputFileHandle); diff --git a/src/wvWare.c b/src/wvWare.c deleted file mode 100644 index 0b653702..00000000 --- a/src/wvWare.c +++ /dev/null @@ -1,1928 +0,0 @@ -/* wvWare - * Copyright (C) Caolan McNamara, Dom Lachowicz, and others - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - * 02111-1307, USA. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include "getopt.h" - -#ifdef __ANDROID_API__ -#include -#define ParenthesesStripper(...) __VA_ARGS__ - -#ifdef wvError -#undef wvError -#endif -#define wvError( args ) __android_log_print(ANDROID_LOG_ERROR, "wv", ParenthesesStripper args); - -#ifdef wvWarning -#undef wvWarning -#endif -#define wvWarning( args ) __android_log_print(ANDROID_LOG_WARN, "wv", args); - -#ifdef wvTrace -#undef wvTrace -#endif -//#define wvTrace( args ) __android_log_print(ANDROID_LOG_VERBOSE, "wv", ParenthesesStripper args); -#define wvTrace( args ) -#endif - -int documentId = 0; - -#define static_reinit( variable, defaultValue ) { \ - static int staticVarValue = 0; \ - if (staticVarValue != documentId) { \ - variable = defaultValue; \ - staticVarValue = documentId; \ - } \ -} - -char *strdup_and_append_twice(const char *a, const char *b, const char *c) { - const size_t szA = strlen(a); - const size_t szB = strlen(b); - const size_t szC = strlen(c); - - char *buf = malloc(szA + szB + szC + 1); - strcpy(buf, a); - strcpy(buf + szA, b); - strcpy(buf + szA + szB, c); - buf[szA + szB + szC] = '\0'; - return buf; -} - -/* -Released under GPL, written by Caolan.McNamara@ul.ie. - -Copyright (C) 1998,1999 - Caolan McNamara - -Real Life: Caolan McNamara * Doing: MSc in HCI -Work: Caolan.McNamara@ul.ie * Phone: +353-86-8790257 -URL: http://skynet.csn.ul.ie/~caolan * Sig: an oblique strategy -How would you have done it? -*/ - -/* -returns 1 for not an ole doc -2 ole but not word doc --1 for an error of some unknown kind -0 on success -*/ - -char *config = "wvHtml.xml"; - -/* flags for -X / --xml option */ -int xml_output = 0; -extern char *xml_slash; - -/* flag for disabling graphics */ -int no_graphics = 0; - -int myelehandler (wvParseStruct * ps, wvTag tag, void *props, int dirty); -int mydochandler (wvParseStruct * ps, wvTag tag); -int myCharProc (wvParseStruct * ps, U16 eachchar, U8 chartype, U16 lid); -int mySpecCharProc (wvParseStruct * ps, U16 eachchar, CHP * achp); - -int wvOpenConfig (state_data *myhandle,char *config); - -char * wv_arg_basename = 0; -char * figure_name (wvParseStruct * ps); -char * name_to_url (char * name); - -char wv_cwd[4097]; - -int HandleBitmap (wvParseStruct * ps, char *name, BitmapBlip * bitmap); -int HandleMetafile (wvParseStruct * ps, char *name, MetaFileBlip * bitmap); - -/* should really be a config.h decl for having strdup, but... */ -#ifdef __MWERKS__ -char * -strdup (const char *text) -{ - char *buf; - size_t len; - - len = strlen (text) + 1; - buf = (char *) wvMalloc (len); - memcpy (buf, text, len); - - return buf; -} - - -#endif - -char * -wvHtmlGraphic (wvParseStruct * ps, Blip * blip) -{ - char *name; - wvStream * fd; - char test[3]; - - name = figure_name (ps); - if (name == 0) return (0); - - /* - temp hack to test older included bmps in word 6 and 7, - should be wrapped in a modern escher strucure before getting - to here, and then handled as normal - */ - wvTrace (("type is %d\n", blip->type)); - switch (blip->type) - { - case msoblipJPEG: - case msoblipDIB: - case msoblipPNG: - fd = (blip->blip.bitmap.m_pvBits); - test[2] = '\0'; - test[0] = read_8ubit (fd); - - test[1] = read_8ubit (fd); - wvStream_rewind (fd); - if (!(strcmp (test, "BM"))) - { - wvAppendStr (&name, ".bmp"); - if (0 != HandleBitmap (ps, name, &blip->blip.bitmap)) - return (NULL); - return (name); - } - default: - break; - } - - switch (blip->type) - { - case msoblipWMF: - wvAppendStr (&name, ".wmf"); - if (0 != HandleMetafile (ps, name, &blip->blip.metafile)) - return (NULL); - break; - case msoblipEMF: - wvAppendStr (&name, ".emf"); - if (0 != HandleMetafile (ps, name, &blip->blip.metafile)) - return (NULL); - break; - case msoblipPICT: - wvAppendStr (&name, ".pict"); - if (0 != HandleMetafile (ps, name, &blip->blip.metafile)) - return (NULL); - break; - case msoblipJPEG: - wvAppendStr (&name, ".jpg"); - if (0 != HandleBitmap (ps, name, &blip->blip.bitmap)) - return (NULL); - break; - case msoblipDIB: - wvAppendStr (&name, ".dib"); - if (0 != HandleBitmap (ps, name, &blip->blip.bitmap)) - return (NULL); - break; - case msoblipPNG: - wvAppendStr (&name, ".png"); - if (0 != HandleBitmap (ps, name, &blip->blip.bitmap)) - return (NULL); - break; - } - return (name); -} - - -int -HandleBitmap (wvParseStruct * ps, char *name, BitmapBlip * bitmap) -{ - wvStream * pwv = bitmap->m_pvBits; - FILE *fd = NULL; - size_t size = 0, i; - - if (ps->dir) chdir (ps->dir); - fd = fopen (name, "wb"); - if (ps->dir) chdir (wv_cwd); - if (fd == NULL) - { - fprintf (stderr,"\nCannot open %s for writing\n",name); - exit (1); - } - size = wvStream_size (pwv); - wvStream_rewind(pwv); - - for (i = 0; i < size; i++) - fputc (read_8ubit(pwv), fd); - fclose (fd); - wvTrace (("Name is %s\n", name)); - return (0); -} - - -int -HandleMetafile (wvParseStruct * ps, char *name, MetaFileBlip * bitmap) -{ - wvStream * pwv = bitmap->m_pvBits; - FILE *fd = NULL; - size_t size = 0, i; - U8 decompressf = 0; - - if (ps->dir) chdir (ps->dir); - fd = fopen (name, "wb"); - if (ps->dir) chdir (wv_cwd); - if (fd == NULL) - { - fprintf (stderr,"\nCannot open %s for writing\n",name); - exit (1); - } - size = wvStream_size (pwv); - wvStream_rewind(pwv); - - if (bitmap->m_fCompression == msocompressionDeflate) - decompressf = setdecom (); - - if ( !decompressf) - { - for (i = 0; i < size; i++) - fputc (read_8ubit(pwv), fd); - } - else /* decompress here */ - { - FILE *tmp = tmpfile (); - FILE *out = tmpfile (); - - for (i = 0; i < size; i++) - fputc (read_8ubit(pwv), tmp); - - rewind (tmp); - decompress (tmp, out, bitmap->m_cbSave, bitmap->m_cb); - fclose (tmp); - - rewind(out); - - for (i = 0; i < bitmap->m_cb; i++) - fputc ( fgetc(out), fd); - - fclose(out); - - } - - fclose (fd); - wvTrace (("Name is %s\n", name)); - return (0); -} - -static void -do_version (void) -{ - /* todo: initialize this in a configure script */ - printf ("wvWare %s\n", VERSION); -} - -static void -do_help (void) -{ - do_version (); - printf ("Usage: wvWare [OPTION...] filename.doc\n"); - printf ("\nCommon Options:\n"); - printf (" -x --config=config.xml\tSpecify an output filter to use\n"); - printf (" -c --charset=charset\t\tSpecify an iconv charset encoding\n"); - printf (" -p --password=password\tSpecify password for encrypted\n\t\t\t\tWord Documents\n"); - printf (" -d --dir=dir\t\t\tDIR\n"); - printf (" -b --basename=name\t\tUse name as base name of image files\n"); - printf (" -a --auto-eps=fmt\t\tQuery support for conversion of fmt to eps\n"); - printf (" -s --suppress=fmt\t\tDon't convert fmt to eps\n"); - printf (" -X --xml\t\t\tXML output\n"); - printf (" -1 --nographics\t\tno 0x01 graphics output\n"); - printf (" -v --version\t\t\tPrint wvWare's version number\n"); - printf (" -? --help\t\t\tPrint this help message\n"); - printf - ("\nwvWare is a suite of applications that converts Microsoft Word Documents\n"); - printf - ("(versions 2,5,6,7,8,9) into more \"useful\" formats such as HTML, LaTeX,\n"); - printf - ("ABW, WML, Text, etc... wvWare is also a library which can be used by\n"); - printf - ("other applications to import (and soon export) Word documents.\n\n"); - printf ("Authors:\nDom Lachowicz (dominicl@seas.upenn.edu)\n"); - printf ("Caolan McNamara (original author)\nVisit http://www.wvware.com\n"); -} - -static void wv_query_eps (const char* format); -static void wv_suppress (const char* format); - -char *charset = "utf-8"; - -#if 0 -int -main (int argc, char **argv) -{ - FILE *input; - char *password = NULL; - char *dir = NULL; - int ret; - state_data myhandle; - expand_data expandhandle; - wvParseStruct ps; - int c, index = 0; - static struct option long_options[] = { - {"charset", 1, 0, 'c'}, - {"config", 1, 0, 'x'}, - {"password", 1, 0, 'p'}, - {"dir", 1, 0, 'd'}, - {"basename", 1, 0, 'b'}, - {"auto-eps", 1, 0, 'a'}, - {"suppress", 1, 0, 's'}, - {"version", 0, 0, 'v'}, - {"help", 0, 0, '?'}, - {"xml", 0, 0, 'X'}, - {"nographics", 0, 0, '1'}, - {0, 0, 0, 0} - }; - - if (argc < 2) - { - do_help (); - exit (-1); - } - - while (1) - { - c = getopt_long (argc, argv, "?vc:x:p:d:b:a:s:X1", long_options, &index); - if (c == -1) - break; - switch (c) - { - case '?': - do_help (); - return 0; - case 'v': - do_version (); - return 0; - case 'c': - if (optarg) - charset = optarg; - else - wvError (("No argument given to charset")); - break; - case 'x': - if (optarg) - config = optarg; - else - wvError (("No config file given to config option")); - break; - case 'p': - if (optarg) - password = optarg; - else - wvError (("No password given to password option")); - break; - case 'd': - if (optarg) - dir = optarg; - else - wvError (("No directory given to dir option")); - break; - case 'b': - if (optarg) - wv_arg_basename = optarg; - else - wvError (("No name given to basename option")); - break; - case 'a': - wv_query_eps (optarg); - return 0; - case 's': - wv_suppress (optarg); - break; - - case 'X': - config = "wvXml.xml"; - charset = "utf-8"; - xml_output = 1; - xml_slash = " /"; - break; - - case '1': - no_graphics = 1; - break; - - default: - do_help (); - return -1; - } - } - - if (optind >= argc) - { - fprintf (stderr, "No file name given to open\n"); - return (-1); - } - -#if 0 - input = fopen (argv[optind], "rb"); - if (!input) - { - fprintf (stderr, "Failed to open %s\n", argv[optind]); - return (-1); - } - fclose (input); -#endif - - getcwd (wv_cwd,4096); - wv_cwd[4096] = 0; - - wvInit (); - ret = wvInitParser (&ps, argv[optind]); - ps.filename = argv[optind]; - ps.dir = dir; - - if (ret & 0x8000) /* Password protected? */ - { - if ((ret & 0x7fff) == WORD8) - { - ret = 0; - if (password == NULL) - { - fprintf (stderr, - "Password required, this is an encrypted document\n"); - return (-1); - } - else - { - wvSetPassword (password, &ps); - if (wvDecrypt97 (&ps)) - { - wvError (("Incorrect Password\n")); - return (-1); - } - } - } - else if (((ret & 0x7fff) == WORD7) || ((ret & 0x7fff) == WORD6)) - { - ret = 0; - if (password == NULL) - { - fprintf (stderr, - "Password required, this is an encrypted document\n"); - return (-1); - } - else - { - wvSetPassword (password, &ps); - if (wvDecrypt95 (&ps)) - { - wvError (("Incorrect Password\n")); - return (-1); - } - } - } - } - - if (ret) - { - wvError (("startup error #%d\n", ret)); - wvOLEFree (&ps); - return (-1); - } - - wvSetElementHandler (&ps, myelehandler); - wvSetDocumentHandler (&ps, mydochandler); - wvSetCharHandler (&ps, myCharProc); - wvSetSpecialCharHandler (&ps, mySpecCharProc); - - wvInitStateData (&myhandle); - - if (wvOpenConfig (&myhandle,config) == 0) - { - wvError (("config file not found\n")); - return (-1); - } - else - { - wvTrace (("x for FILE is %x\n", myhandle.fp)); - ret = wvParseConfig (&myhandle); - } - - if (!ret) - { - expandhandle.sd = &myhandle; - ps.userData = &expandhandle; - ret = wvHtml (&ps); - } - wvReleaseStateData (&myhandle); - - if (ret == 2) - return (2); - else if (ret != 0) - ret = -1; - wvOLEFree (&ps); - wvShutdown (); - - return (ret); -} -#endif - -int convert(char *inputFile, char *outputDir, const char *password) { - int ret; - state_data myhandle; - expand_data expandhandle; - wvParseStruct ps; - - config = "wvHtml.xml"; - - getcwd (wv_cwd,4096); - wv_cwd[4096] = 0; - - wvInit (); - ret = wvInitParser (&ps, inputFile); - ps.dir = outputDir; - - if (ret & 0x8000) /* Password protected? */ - { - if ((ret & 0x7fff) == WORD8) - { - ret = 0; - if (password == NULL || password[0] == '\0') - { - fprintf (stderr, - "Password required, this is an encrypted document\n"); - return 100; - } - else - { - wvSetPassword (password, &ps); - if (wvDecrypt97 (&ps)) - { - wvError (("Incorrect Password\n")); - return 101; - } - } - } - else if (((ret & 0x7fff) == WORD7) || ((ret & 0x7fff) == WORD6)) - { - ret = 0; - if (password == NULL || password[0] == '\0') - { - fprintf (stderr, - "Password required, this is an encrypted document\n"); - return 100; - } - else - { - wvSetPassword (password, &ps); - if (wvDecrypt95 (&ps)) - { - wvError (("Incorrect Password\n")); - return 101; - } - } - } - } - - if (ret) - { - wvError (("startup error #%d\n", ret)); - wvOLEFree (&ps); - return (-1); - } - - wvSetElementHandler (&ps, myelehandler); - wvSetDocumentHandler (&ps, mydochandler); - wvSetCharHandler (&ps, myCharProc); - wvSetSpecialCharHandler (&ps, mySpecCharProc); - - wvInitStateData (&myhandle); - - if (wvOpenConfig (&myhandle,config) == 0) - { - wvError (("config file not found\n")); - return (-1); - } - else - { - wvTrace (("x for FILE is %x\n", myhandle.fp)); - ret = wvParseConfig (&myhandle); - } - - if (!ret) - { - expandhandle.sd = &myhandle; - ps.userData = &expandhandle; - ret = wvHtml (&ps); - } - wvReleaseStateData (&myhandle); - - if (ret == 2) - return (2); - else if (ret != 0) - ret = -1; - wvOLEFree (&ps); - wvShutdown (); - - return (ret); -} - -int -myelehandler (wvParseStruct * ps, wvTag tag, void *props, int dirty) -{ - static PAP *ppap; - static_reinit(ppap, NULL) - - expand_data *data = (expand_data *) ps->userData; - data->anSttbfAssoc = &ps->anSttbfAssoc; - data->lfo = &ps->lfo; - data->lfolvl = ps->lfolvl; - data->lvl = ps->lvl; - data->nolfo = &ps->nolfo; - data->nooflvl = &ps->nooflvl; - data->stsh = &ps->stsh; - data->lst = &ps->lst; - data->noofLST = &ps->noofLST; - data->liststartnos = &ps->liststartnos; - data->listnfcs = &ps->listnfcs; - data->finallvl = &ps->finallvl; - data->fib = &ps->fib; - data->dop = &ps->dop; - data->intable = &ps->intable; - data->cellbounds = &ps->cellbounds; - data->nocellbounds = &ps->nocellbounds; - data->endcell = &ps->endcell; - data->vmerges = &ps->vmerges; - data->norows = &ps->norows; - data->nextpap = &ps->nextpap; - if (charset == NULL) - { - data->charset = wvAutoCharset (ps); - charset = data->charset; - } - else - data->charset = charset; - data->props = props; - - switch (tag) - { - case PARABEGIN: - { - S16 tilfo = 0; - /* test begin */ - if (*(data->endcell)) - { - tilfo = ((PAP *) (data->props))->ilfo; - ((PAP *) (data->props))->ilfo = 0; - } - /* test end */ - ppap = (PAP *) data->props; - wvTrace ( - ("fore back is %d %d\n", - ((PAP *) (data->props))->shd.icoFore, - ((PAP *) (data->props))->shd.icoBack)); - wvBeginPara (data); - if (tilfo) - ((PAP *) (data->props))->ilfo = tilfo; - } - break; - case PARAEND: - { - S16 tilfo = 0; - /* test begin */ - if (*(data->endcell)) - { - tilfo = ((PAP *) (data->props))->ilfo; - ((PAP *) (data->props))->ilfo = 0; - } - /* test end */ - wvEndCharProp (data); /* danger will break in the future */ - wvEndPara (data); - if (tilfo) - ((PAP *) (data->props))->ilfo = tilfo; - wvCopyPAP (&data->lastpap, (PAP *) (data->props)); - } - break; - case CHARPROPBEGIN: - wvBeginCharProp (data, ppap); - break; - case CHARPROPEND: - wvEndCharProp (data); - break; - case SECTIONBEGIN: - wvBeginSection (data); - break; - case SECTIONEND: - wvEndSection (data); - break; - case COMMENTBEGIN: - wvBeginComment (data); - break; - case COMMENTEND: - wvEndComment (data); - break; - default: - break; - } - return (0); -} - -int -mydochandler (wvParseStruct * ps, wvTag tag) -{ - static int i; - static_reinit(i, 0) - expand_data *data = (expand_data *) ps->userData; - data->anSttbfAssoc = &ps->anSttbfAssoc; - data->lfo = &ps->lfo; - data->lfolvl = ps->lfolvl; - data->lvl = ps->lvl; - data->nolfo = &ps->nolfo; - data->nooflvl = &ps->nooflvl; - data->stsh = &ps->stsh; - data->lst = &ps->lst; - data->noofLST = &ps->noofLST; - data->liststartnos = &ps->liststartnos; - data->listnfcs = &ps->listnfcs; - data->finallvl = &ps->finallvl; - data->fib = &ps->fib; - data->dop = &ps->dop; - data->intable = &ps->intable; - data->cellbounds = &ps->cellbounds; - data->nocellbounds = &ps->nocellbounds; - data->endcell = &ps->endcell; - data->vmerges = &ps->vmerges; - data->norows = &ps->norows; - if (i == 0) - { - wvSetEntityConverter (data); - data->filename = ps->filename; - data->whichcell = 0; - data->whichrow = 0; - data->asep = NULL; - i++; - wvInitPAP (&data->lastpap); - data->nextpap = NULL; - data->ps = ps; - } - - if (charset == NULL) - { - data->charset = wvAutoCharset (ps); - charset = data->charset; - } - else - data->charset = charset; - - switch (tag) - { - case DOCBEGIN: - wvBeginDocument (data); - break; - case DOCEND: - wvEndDocument (data); - break; - default: - break; - } - return (0); -} - -void -wvStrangeNoGraphicData (char *config, int graphicstype) -{ - wvError (("Strange No Graphic Data in the 0x01/0x08 graphic\n")); - - if ((strstr (config, "wvLaTeX.xml") != NULL) - || (strstr (config, "wvCleanLaTeX.xml") != NULL)) - printf - ("\n\\resizebox*{\\baselineskip}{!}{\\includegraphics{placeholder.eps}}\ - \n-- %#.2x graphic: StrangeNoGraphicData --", - graphicstype); - else - printf ("\"%#.2x", graphicstype, - "StrangeNoGraphicData", xml_slash, xml_slash); - return; -} - -/* routines for conversion from WMF to EPS or PNG using libwmf(2) library. - */ -int wv_wmfRead (void *); -int wv_wmfSeek (void *, long); -long wv_wmfTell (void *); - -void wvConvert_WMF_to_EPS (int, int, char **); -void wvConvert_WMF_to_PNG (int, int, char **); -void wvConvert_PNG_to_EPS (int, int, char **); -void wvConvert_JPG_to_EPS (int, int, char **); - -int -wv_wmfRead (void *context) -{ - return (fgetc ((FILE *) context)); -} - -int -wv_wmfSeek (void *context, long position) -{ - return (fseek ((FILE *) context, position, SEEK_SET)); -} - -long -wv_wmfTell (void *context) -{ - return (ftell ((FILE *) context)); -} - -#ifdef HAVE_LIBWMF - -#include -#include -#ifdef HAVE_LIBWMF_FOREIGN_H -#include -#endif - -#endif /* HAVE_LIBWMF */ - -void -wvConvert_WMF_to_EPS (int width, int height, char **source) -{ -#ifdef HAVE_LIBWMF - FILE *in = 0; - FILE *out = 0; - - char *sink = 0; - - unsigned long flags; - - wmf_error_t err; - - wmf_eps_t *ddata = 0; - - wmfAPI *API = 0; - - wmfAPI_Options api_options; - - wmfD_Rect bbox; - - in = fopen (*source, "rb"); - - if (in == 0) - return; - - sink = strdup (*source); - - remove_suffix (sink, ".wmf"); - wvAppendStr (&sink, ".eps"); - - out = fopen (sink, "wb"); - - if (out == 0) - { - wvFree (sink); - fclose (in); - return; - } - - flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; - api_options.function = wmf_eps_function; - - err = wmf_api_create (&API, flags, &api_options); - if (err != wmf_E_None) - goto _wmf_error; - - ddata = WMF_EPS_GetData (API); - - err = wmf_bbuf_input (API, wv_wmfRead, wv_wmfSeek, wv_wmfTell, (void *) in); - if (err != wmf_E_None) - goto _wmf_error; - - err = wmf_scan (API, 0, &bbox); - if (err != wmf_E_None) - goto _wmf_error; - - ddata->out = wmf_stream_create (API,out); - if (out == 0) - goto _wmf_error; - - ddata->bbox = bbox; - - ddata->eps_width = width; - ddata->eps_height = height; - - err = wmf_play (API, 0, &bbox); - if (err != wmf_E_None) - goto _wmf_error; - - wmf_api_destroy (API); - - fclose (in); - fclose (out); - - *source = sink; - - return; - - _wmf_error: - if (API) - wmf_api_destroy (API); - - fclose (in); - fclose (out); - - wvFree (sink); -#endif /* HAVE_LIBWMF */ -} - -#ifdef HAVE_LIBWMF - -#include -#include - -#endif /* HAVE_LIBWMF */ - -void -wvConvert_WMF_to_PNG (int width, int height, char **source) -{ -#ifdef HAVE_LIBWMF - FILE *in = 0; - FILE *out = 0; - - char *sink = 0; - - unsigned long flags; - - wmf_error_t err; - - wmf_gd_t *ddata = 0; - - wmfAPI *API = 0; - - wmfAPI_Options api_options; - - wmfD_Rect bbox; - - in = fopen (*source, "rb"); - - if (in == 0) - return; - - sink = strdup (*source); - - remove_suffix (sink, ".wmf"); - wvAppendStr (&sink, ".png"); - - out = fopen (sink, "wb"); - - if (out == 0) - { - wvFree (sink); - fclose (in); - return; - } - - flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; - api_options.function = wmf_gd_function; - - err = wmf_api_create (&API, flags, &api_options); - if (err != wmf_E_None) - goto _wmf_error; - - ddata = WMF_GD_GetData (API); - if ((ddata->flags & WMF_GD_SUPPORTS_PNG) == 0) - goto _wmf_error; - - err = wmf_bbuf_input (API, wv_wmfRead, wv_wmfSeek, wv_wmfTell, (void *) in); - if (err != wmf_E_None) - goto _wmf_error; - - err = wmf_scan (API, 0, &bbox); - if (err != wmf_E_None) - goto _wmf_error; - - ddata->type = wmf_gd_png; - - ddata->flags |= WMF_GD_OUTPUT_FILE; - ddata->file = out; - - ddata->bbox = bbox; - - ddata->width = width; - ddata->height = height; - - err = wmf_play (API, 0, &bbox); - if (err != wmf_E_None) - goto _wmf_error; - - wmf_api_destroy (API); - - fclose (in); - fclose (out); - - *source = sink; - - return; - - _wmf_error: - if (API) - wmf_api_destroy (API); - - fclose (in); - fclose (out); - - wvFree (sink); -#endif /* HAVE_LIBWMF */ -} - -void -wvConvert_PNG_to_EPS (int width, int height, char **source) -{ -#ifdef HAVE_LIBWMF_FOREIGN_H - FILE *in = 0; - FILE *out = 0; - - char *sink = 0; - - unsigned long flags; - - wmf_error_t err; - - wmf_foreign_t *ddata = 0; - - wmfAPI *API = 0; - - wmfAPI_Options api_options; - - wmfImage image; - - flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; - api_options.function = wmf_foreign_function; - - err = wmf_api_create (&API, flags, &api_options); - if (err != wmf_E_None) - return; - - ddata = WMF_FOREIGN_GetData (API); - - if ((ddata->flags & WMF_FOREIGN_SUPPORTS_PNG) == 0) - { - wmf_api_destroy (API); - return; - } - - in = fopen (*source, "rb"); - - if (in == 0) - { - wmf_api_destroy (API); - return; - } - - if (wmf_image_load_png (API,in,&image) == (-1)) - { - fclose (in); - wmf_api_destroy (API); - return; - } - - fclose (in); - - sink = strdup (*source); - - remove_suffix (sink, ".png"); - wvAppendStr (&sink, ".eps"); - - out = fopen (sink, "wb"); - - if (out == 0) - { - wvFree (sink); - wmf_image_free (API,&image); - wmf_api_destroy (API); - return; - } - - wmf_image_save_eps (API,out,&image); - - fclose (out); - - wmf_image_free (API,&image); - wmf_api_destroy (API); - - *source = sink; - - return; -#endif /* HAVE_LIBWMF_FOREIGN_H */ -} - -void -wvConvert_JPG_to_EPS (int width, int height, char **source) -{ -#ifdef HAVE_LIBWMF_FOREIGN_H - FILE *in = 0; - FILE *out = 0; - - char *sink = 0; - - unsigned long flags; - - wmf_error_t err; - - wmf_foreign_t *ddata = 0; - - wmfAPI *API = 0; - - wmfAPI_Options api_options; - - wmfImage image; - - flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; - api_options.function = wmf_foreign_function; - - err = wmf_api_create (&API, flags, &api_options); - if (err != wmf_E_None) - return; - - ddata = WMF_FOREIGN_GetData (API); - - if ((ddata->flags & WMF_FOREIGN_SUPPORTS_JPEG) == 0) - { - wmf_api_destroy (API); - return; - } - - in = fopen (*source, "rb"); - - if (in == 0) - { - wmf_api_destroy (API); - return; - } - - if (wmf_image_load_jpg (API,in,&image) == (-1)) - { - fclose (in); - wmf_api_destroy (API); - return; - } - - fclose (in); - - sink = strdup (*source); - - remove_suffix (sink, ".jpg"); - wvAppendStr (&sink, ".eps"); - - out = fopen (sink, "wb"); - - if (out == 0) - { - wvFree (sink); - wmf_image_free (API,&image); - wmf_api_destroy (API); - return; - } - - wmf_image_save_eps (API,out,&image); - - fclose (out); - - wmf_image_free (API,&image); - wmf_api_destroy (API); - - *source = sink; - - return; -#endif /* HAVE_LIBWMF_FOREIGN_H */ -} - -static void wv_query_eps (const char* format) -{ -#ifdef HAVE_LIBWMF - unsigned long flags; - - wmf_error_t err; -#ifdef HAVE_LIBWMF_FOREIGN_H - wmf_foreign_t *ddata = 0; -#endif /* HAVE_LIBWMF_FOREIGN_H */ - wmfAPI* API = 0; - wmfAPI_Options api_options; -#endif /* HAVE_LIBWMF */ - - if (format == 0) - { - printf ("no\n"); - return; - } - -#ifdef HAVE_LIBWMF - if (strcmp (format,"wmf") == 0) - { - printf ("yes\n"); - return; - } -#ifdef HAVE_LIBWMF_FOREIGN_H - if (strcmp (format,"png") == 0) - { - flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; - api_options.function = wmf_foreign_function; - - err = wmf_api_create (&API, flags, &api_options); - if (err != wmf_E_None) - { - printf ("no\n"); - return; - } - - ddata = WMF_FOREIGN_GetData (API); - - if (ddata->flags & WMF_FOREIGN_SUPPORTS_PNG) - { - printf ("yes\n"); - } - else - { - printf ("no\n"); - } - - wmf_api_destroy (API); - return; - } - if (strcmp (format,"jpg") == 0) - { - flags = WMF_OPT_IGNORE_NONFATAL | WMF_OPT_FUNCTION; - api_options.function = wmf_foreign_function; - - err = wmf_api_create (&API, flags, &api_options); - if (err != wmf_E_None) - { - printf ("no\n"); - return; - } - - ddata = WMF_FOREIGN_GetData (API); - - if (ddata->flags & WMF_FOREIGN_SUPPORTS_JPEG) - { - printf ("yes\n"); - } - else - { - printf ("no\n"); - } - - wmf_api_destroy (API); - return; - } -#endif /* HAVE_LIBWMF_FOREIGN_H */ -#endif /* HAVE_LIBWMF */ - - printf ("no\n"); - return; -} - -static int Convert_WMF = 1; -static int Convert_EMF = 1; -static int Convert_PNG = 1; -static int Convert_JPG = 1; -static int Convert_PICT = 1; - -static void wv_suppress (const char* format) -{ - const char* ptr = format; - - if (format == 0) - { - Convert_WMF = 1; - Convert_EMF = 1; - Convert_PNG = 1; - Convert_JPG = 1; - Convert_PICT = 1; - - return; - } - - while (*ptr) - { - if (strncmp (ptr,"wmf,",4) == 0) - { - Convert_WMF = 0; - ptr += 4; - continue; - } - if (strncmp (ptr,"emf,",4) == 0) - { - Convert_EMF = 0; - ptr += 4; - continue; - } - if (strncmp (ptr,"png,",4) == 0) - { - Convert_PNG = 0; - ptr += 4; - continue; - } - if (strncmp (ptr,"jpg,",4) == 0) - { - Convert_JPG = 0; - ptr += 4; - continue; - } - if (strncmp (ptr,"pict,",5) == 0) - { - Convert_PICT = 0; - ptr += 5; - continue; - } - - if (strcmp (ptr,"wmf") == 0) - { - Convert_WMF = 0; - break; - } - if (strcmp (ptr,"emf") == 0) - { - Convert_EMF = 0; - break; - } - if (strcmp (ptr,"png") == 0) - { - Convert_PNG = 0; - break; - } - if (strcmp (ptr,"jpg") == 0) - { - Convert_JPG = 0; - break; - } - if (strcmp (ptr,"pict") == 0) - { - Convert_PICT = 0; - break; - } - - fprintf (stderr,"format(s) `%s' not recognized!\n",ptr); - break; - } -} - -void -wvPrintGraphics (char *config, int graphicstype, int width, int height, - char *source) -{ - if ((strstr (config, "wvLaTeX.xml") != NULL) - || (strstr (config, "wvCleanLaTeX.xml") != NULL)) - { - if (strlen (source) >= 4) - { - if (Convert_WMF && strcmp (source + strlen (source) - 4, ".wmf") == 0) - wvConvert_WMF_to_EPS (width, height, &source); - else if (Convert_PNG && strcmp (source + strlen (source) - 4, ".png") == 0) - wvConvert_PNG_to_EPS (width, height, &source); - else if (Convert_JPG && strcmp (source + strlen (source) - 4, ".jpg") == 0) - wvConvert_JPG_to_EPS (width, height, &source); - } - remove_suffix (source, ".eps"); - remove_suffix (source, ".wmf"); - remove_suffix (source, ".pict"); - remove_suffix (source, ".png"); - remove_suffix (source, ".jpg"); - /* - Output to real file name. Conversion to .eps must be done manually for now - */ - printf ("\n\\resizebox{%dpt}{%dpt}\ - {\\includegraphics{%s.eps}}\ - \n% -- %#.2x graphic -- \n", width, height, source, graphicstype); - } - else - { - if (strlen (source) >= 4) - if (strcmp (source + strlen (source) - 4, ".wmf") == 0) - wvConvert_WMF_to_PNG (width, height, &source); - if ((strstr (config, "wvHtml.xml") != NULL) - || (strstr (config, "wvWml.xml") != NULL)) - { - printf ("\"%#.2x", - width, height, graphicstype, name_to_url (source), - xml_slash, xml_slash); - } - else - { - printf ("\"%#.2x", - width, height, graphicstype, source, - xml_slash, xml_slash); - } - } - return; -} - -int -mySpecCharProc (wvParseStruct * ps, U16 eachchar, CHP * achp) -{ - static int message; - static_reinit(message, 0) - PICF picf; - FSPA *fspa; - expand_data *data = (expand_data *) ps->userData; - - switch (eachchar) - { - case 19: - wvError (("field began\n")); - ps->fieldstate++; - ps->fieldmiddle = 0; - fieldCharProc (ps, eachchar, 0, 0x400); /* temp */ - return (0); - break; - case 20: - wvTrace (("field middle\n")); - if (achp->fOle2) - { - wvError ( - ("this field has an associated embedded object of id %x\n", - achp->fcPic_fcObj_lTagObj)); - /*test = wvFindObject(achp->fcPic_fcObj_lTagObj); - if (test) - wvError(("data can be found in object entry named %s\n",test->name)); - */ } - fieldCharProc (ps, eachchar, 0, 0x400); /* temp */ - ps->fieldmiddle = 1; - return (0); - break; - case 21: - wvTrace (("field end\n")); - ps->fieldstate--; - ps->fieldmiddle = 0; - fieldCharProc (ps, eachchar, 0, 0x400); /* temp */ - return (0); - break; - } - - if (ps->fieldstate) - { - if (fieldCharProc (ps, eachchar, 0, 0x400)) - return (0); - } - - switch (eachchar) - { - case 0x05: - /* this should be handled by the COMMENTBEGIN and COMMENTEND events */ - return (0); - break; - case 0x01: - { - wvStream *f; - Blip blip; - char *name; - long p = wvStream_tell (ps->data); - wvError ( - ("picture 0x01 here, at offset %x in Data Stream, obj is %d, ole is %d\n", - achp->fcPic_fcObj_lTagObj, achp->fObj, achp->fOle2)); - - if (achp->fOle2) - return (0); - if(!no_graphics) - { - wvStream_goto (ps->data, achp->fcPic_fcObj_lTagObj); - wvGetPICF (wvQuerySupported (&ps->fib, NULL), &picf, ps->data); - f = picf.rgb; - if (wv0x01 (&blip, f, picf.lcb - picf.cbHeader)) - { - wvTrace (("Here\n")); - name = wvHtmlGraphic (ps, &blip); - if (ps->dir) chdir (ps->dir); - wvPrintGraphics (config, 0x01, - (int) wvTwipsToHPixels (picf.dxaGoal), - (int) wvTwipsToVPixels (picf.dyaGoal), - name); - if (ps->dir) chdir (wv_cwd); - wvFree (name); - } - else - wvStrangeNoGraphicData (config, 0x01); - } - - wvStream_goto (ps->data, p); - return (0); - } - case 0x08: - { - Blip blip; - char *name; - if (wvQuerySupported (&ps->fib, NULL) == WORD8) - { - if(!no_graphics) - { - if (ps->nooffspa > 0) - { - fspa = - wvGetFSPAFromCP (ps->currentcp, ps->fspa, - ps->fspapos, ps->nooffspa); - - if (!fspa) - { - wvError (("No fspa! Insanity abounds!\n")); - return 0; - } - - data->props = fspa; - if (wv0x08 (&blip, fspa->spid, ps)) - { - wvTrace (("Here\n")); - name = wvHtmlGraphic (ps, &blip); - if (ps->dir) chdir (ps->dir); - wvPrintGraphics (config, 0x08, - (int) - wvTwipsToHPixels (fspa->xaRight - - - fspa-> - xaLeft), - (int) wvTwipsToVPixels (fspa-> - yaBottom - - - fspa-> - yaTop), - name); - if (ps->dir) chdir (wv_cwd); - wvFree (name); - } - else - wvStrangeNoGraphicData (config, 0x08); - } - else - { - wvError (("nooffspa was <=0! Ignoring.\n")); - } - } - } - else - { - FDOA *fdoa; - wvError ( - ("pre word8 0x08 graphic, unsupported at the moment\n")); - fdoa = - wvGetFDOAFromCP (ps->currentcp, ps->fdoa, ps->fdoapos, - ps->nooffdoa); - data->props = fdoa; - } - - - - - -#if 0 - if ((fspa) && (data->sd != NULL) - && (data->sd->elements[TT_PICTURE].str) - && (data->sd->elements[TT_PICTURE].str[0] != NULL)) - { - wvExpand (data, data->sd->elements[TT_PICTURE].str[0], - strlen (data->sd->elements[TT_PICTURE].str[0])); - if (data->retstring) - { - wvTrace ( - ("picture string is now %s", - data->retstring)); - printf ("%s", data->retstring); - wvFree (data->retstring); - } - } -#endif - return (0); - } - case 0x28: - { - U16 symbol[6] = { 'S', 'y', 'm', 'b', 'o', 'l' }; - U16 wingdings[9] = - { 'W', 'i', 'n', 'g', 'd', 'i', 'n', 'g', 's' }; - U16 mtextra[8] = - { 'M', 'T', ' ', 'E', 'x', 't', 'r', 'a' }; - - wvTrace ( - ("no of strings %d %d\n", ps->fonts.nostrings, - achp->ftcSym)); - if (0 == memcmp (symbol, ps->fonts.ffn[achp->ftcSym].xszFfn, 12)) - { - if ((!message) && (strcasecmp ("UTF-8", charset))) - { - wvWarning - ("Symbol font detected (too late sorry!), rerun wvHtml with option --charset utf-8\n\ -option to support correct symbol font conversion to a viewable format.\n"); - message++; - } - wvTrace ( - ("symbol char %d %x %c, using font %d %s\n", - achp->xchSym, achp->xchSym, achp->xchSym, - achp->ftcSym, - wvWideStrToMB (ps->fonts.ffn[achp->ftcSym]. - xszFfn))); - wvTrace ( - ("symbol char ends up as a unicode %x\n", - wvConvertSymbolToUnicode (achp->xchSym - 61440))); - wvOutputFromUnicode (wvConvertSymbolToUnicode - (achp->xchSym - 61440), charset); - return (0); - } - else if (0 == - memcmp (mtextra, ps->fonts.ffn[achp->ftcSym].xszFfn, - 16)) - { - if ((!message) && (strcasecmp ("UTF-8", charset))) - { - wvWarning - ("MT Extra font detected (too late sorry!), rerun wvHtml with option --charset utf-8\n\ -option to support correct symbol font conversion to a viewable format.\n"); - message++; - } - wvTrace ( - ("Symbol char %d %x %c, using font %d %s\n", - achp->xchSym, achp->xchSym, achp->xchSym, - achp->ftcSym, - wvWideStrToMB (ps->fonts.ffn[achp->ftcSym]. - xszFfn))); - wvTrace ( - ("symbol char ends up as a unicode %x\n", - wvConvertMTExtraToUnicode (achp->xchSym - 61440))); - wvOutputFromUnicode (wvConvertMTExtraToUnicode - (achp->xchSym - 61440), charset); - return (0); - } - else if (0 == - memcmp (wingdings, ps->fonts.ffn[achp->ftcSym].xszFfn, - 18)) - { - if (!message) - { - wvError ( - ("I have yet to do a wingdings to unicode mapping table, if you know of one tell me\n")); - message++; - } - } - else - { - if (!message) - { - char *fontname = - wvWideStrToMB (ps->fonts.ffn[achp->ftcSym]. - xszFfn); - wvError ( - ("Special font %s, i need a mapping table to unicode for this\n", - fontname)); - wvFree (fontname); - printf ("*"); - } - return (0); - } - } - default: - return (0); - } - - - - return (0); -} - - -int -myCharProc (wvParseStruct * ps, U16 eachchar, U8 chartype, U16 lid) -{ - switch (eachchar) - { - case 19: - wvTrace (("field began\n")); - ps->fieldstate++; - ps->fieldmiddle = 0; - fieldCharProc (ps, eachchar, chartype, lid); /* temp */ - return (0); - break; - case 20: - wvTrace (("field middle\n")); - fieldCharProc (ps, eachchar, chartype, lid); - ps->fieldmiddle = 1; - return (0); - break; - case 21: - wvTrace (("field began\n")); - ps->fieldmiddle = 0; - ps->fieldstate--; - fieldCharProc (ps, eachchar, chartype, lid); /* temp */ - return (0); - break; - case 0x08: - wvError ( - ("hmm did we loose the fSpec flag ?, this is possibly a bug\n")); - break; - } - - if (ps->fieldstate) - { - if (fieldCharProc (ps, eachchar, chartype, lid)) - return (0); - } - - wvTrace ( - ("charset is %s, lid is %x, type is %d, char is %x\n", charset, - lid, chartype, eachchar)); - - if ((chartype) && (wvQuerySupported (&ps->fib, NULL) == WORD8)) - wvTrace (("lid is %x\n", lid)); - - if (charset != NULL) - wvOutputHtmlChar (eachchar, chartype, charset, lid); - else - wvOutputHtmlChar (eachchar, chartype, wvAutoCharset (ps), lid); - return (0); -} - -int -wvOpenConfig (state_data *myhandle,char *config) -{ - FILE *tmp; - int i = 0; - if (config == NULL) - config = "wvHtml.xml"; - else - i = 1; - tmp = fopen (config, "rb"); - - if(tmp == NULL) - { - const char *wv_data_dir = getenv("WVDATADIR"); - if (NULL == wv_data_dir) { - wvError (("Env var WVDATADIR unset!")); - return 0; - } - static char * buf = NULL; - if (NULL != buf) { - free(buf); - } - buf = strdup_and_append_twice(wv_data_dir, "/", config); - config = buf; - tmp = fopen(config, "rb"); - } - - if (tmp == NULL) - { - const char *wv_data_dir = getenv("WVDATADIR"); - if (NULL == wv_data_dir) { - wvError (("Env var WVDATADIR unset!")); - return 0; - } - char * html_config = strdup_and_append_twice(wv_data_dir, "/", "wvHtml.xml"); - if (i) - wvError (("Attempt to open %s failed, using %s\n", config, html_config)); - config = html_config; - tmp = fopen (config, "rb"); - } - myhandle->path = config; - myhandle->fp = tmp; - return (tmp == NULL ? 0 : 1); -} - -char * figure_name (wvParseStruct * ps) -{ - static int number; - static_reinit(number, 0) - static char * b_name = 0; - static_reinit(b_name, NULL) - char * f_name = 0; - char buffer[10]; - - if (b_name == 0) - { - if (wv_arg_basename) - { - b_name = strdup (wv_arg_basename); -#ifdef WV_REMOVE_SUFFIX - if (b_name) /* remove any suffix */ - { - char * dot = 0; - char * ptr = b_name; - while (*ptr) - { - if (*ptr == '.') dot = ptr; - ptr++; - } - if (dot) *dot = 0; - } -#endif /* WV_REMOVE_SUFFIX */ - } - else - { - b_name = strdup (base_name (ps->filename)); - if (b_name) /* remove '.doc' suffix; case insensitive */ - { - if (strlen (b_name) >= 4) - { - char * dot = b_name + strlen (b_name) - 4; - if (strcasecmp (dot,".doc") == 0) *dot = 0; - } - } - } - } - - if (b_name == 0) - { - fprintf (stderr,"error: unable to create basename!"); - exit (1); - } - - f_name = strdup (b_name); - if (f_name) - { - sprintf (buffer, "%d", number++); - wvAppendStr (&f_name, buffer); - } - else - { - fprintf (stderr,"error: unable to create filename!"); - exit (1); - } - - return (f_name); -} - -char * name_to_url (char * name) -{ - static char * url = 0; - static_reinit(url, NULL) - static long max = 0; - static_reinit(max, 0) - char * ptr = 0; - long count = 0; - - ptr = name; - while (*ptr) - { - switch (*ptr) - { - case ' ': - count += 3; - break; - default: - count++; - break; - } - ptr++; - } - count++; - - if (count > max) - { - char * more = 0; - if (url == 0) - { - more = malloc (count); - } - else - { - more = realloc (url,count); - } - if (more) - { - url = more; - max = count; - } - } - - if (url) - { - count = 0; - ptr = name; - while (*ptr && (count < max)) - { - switch (*ptr) - { - case ' ': - url[count++] = '%'; - if (count < max) url[count++] = '2'; - if (count < max) url[count++] = '0'; - break; - default: - url[count++] = *ptr; - break; - } - ptr++; - } - url[max-1] = 0; - } - else - { - wvError (("failed to convert name to URL\n")); - return (name); - } - - return (url); -} From 0322ffbaed9c95545155569272a06ae553e1fb30 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 26 Aug 2024 23:10:08 +0300 Subject: [PATCH 50/97] Format wvWare_wrapper.cpp --- src/odr/internal/html/wvWare_wrapper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index 29e847f7..0ea76ba4 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -28,7 +28,8 @@ Html wvWare_wrapper(const std::string &input_path, if (password.has_value()) { password_value = password.value(); } - int retVal = wvHtml_convert(input_file_path, output_dir, password_value.c_str()); + int retVal = + wvHtml_convert(input_file_path, output_dir, password_value.c_str()); free(output_dir); free(input_file_path); fclose(g_htmlOutputFileHandle); From cde62f8965c57e92aaea5054908f5010423fde5f Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 27 Aug 2024 09:46:13 +0300 Subject: [PATCH 51/97] Unexpose pdf2htmlEX and wvWare in odr::OpenDocumentReader class --- src/odr/open_document_reader.cpp | 28 +--------------------------- src/odr/open_document_reader.hpp | 28 ---------------------------- 2 files changed, 1 insertion(+), 55 deletions(-) diff --git a/src/odr/open_document_reader.cpp b/src/odr/open_document_reader.cpp index 73c9a235..04d8d865 100644 --- a/src/odr/open_document_reader.cpp +++ b/src/odr/open_document_reader.cpp @@ -7,17 +7,11 @@ #include #include #include +#include #include #include -#if defined(WITH_PDF2HTMLEX) -#include -#endif -#if defined(WITH_WVWARE) -#include -#endif - namespace odr { std::string OpenDocumentReader::version() noexcept { @@ -251,26 +245,6 @@ Html OpenDocumentReader::html(const PdfFile &pdf_file, return html::translate(pdf_file, output_path, config); } -#if defined(WITH_PDF2HTMLEX) -Html OpenDocumentReader::pdf2htmlEX(const std::string &input_path, - const std::string &output_path, - const HtmlConfig &config, - std::optional &password) { - return internal::html::pdf2htmlEX_wrapper(input_path, output_path, config, - password); -} -#endif - -#if defined(WITH_WVWARE) -Html OpenDocumentReader::wvHtml(const std::string &input_path, - const std::string &output_path, - const HtmlConfig &config, - std::optional &password) { - return internal::html::wvWare_wrapper(input_path, output_path, config, - password); -} -#endif - void OpenDocumentReader::edit(const Document &document, const char *diff) { html::edit(document, diff); } diff --git a/src/odr/open_document_reader.hpp b/src/odr/open_document_reader.hpp index 3849389c..e73c993e 100644 --- a/src/odr/open_document_reader.hpp +++ b/src/odr/open_document_reader.hpp @@ -2,8 +2,6 @@ #define ODR_OPEN_DOCUMENT_READER_HPP #include -#include -#include #include #include @@ -141,32 +139,6 @@ class OpenDocumentReader final { const std::string &output_path, const HtmlConfig &config); -#if defined(WITH_PDF2HTMLEX) - /// @brief Translates a PDF file to HTML using pdf2htmlEX. - /// - /// @param input_path Path to the file to translate. - /// @param output_path Path to save the HTML output. - /// @param config Configuration for the HTML output. - /// @return HTML output. - [[nodiscard]] static Html pdf2htmlEX(const std::string &input_path, - const std::string &output_path, - const HtmlConfig &config, - std::optional &password); -#endif - -#if defined(WITH_WVWARE) - /// @brief Translates a doc file to HTML using wvWare. - /// - /// @param input_path Path to the file to translate. - /// @param output_path Path to save the HTML output. - /// @param config Configuration for the HTML output. - /// @return HTML output. - [[nodiscard]] static Html wvHtml(const std::string &input_path, - const std::string &output_path, - const HtmlConfig &config, - std::optional &password); -#endif - /// @brief Edit a document. /// @param document The document. /// @param diff The diff. From a010d3d7f89bd8007edfb7b2c59850dbe7da1597 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 27 Aug 2024 09:48:03 +0300 Subject: [PATCH 52/97] Add std::optional header include in wvWare_wrapper header --- src/odr/internal/html/wvWare_wrapper.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/odr/internal/html/wvWare_wrapper.hpp b/src/odr/internal/html/wvWare_wrapper.hpp index 0d9e4e06..e7000901 100644 --- a/src/odr/internal/html/wvWare_wrapper.hpp +++ b/src/odr/internal/html/wvWare_wrapper.hpp @@ -1,6 +1,7 @@ #ifndef ODR_INTERNAL_WVWARE_WRAPPER_HPP #define ODR_INTERNAL_WVWARE_WRAPPER_HPP +#include #include namespace odr { From 3343f18179d80de8be786f1bc62700d0c28a7ec1 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 27 Aug 2024 09:49:47 +0300 Subject: [PATCH 53/97] Remove leftover headers from wvWare_wrapper --- src/odr/internal/html/wvWare_wrapper.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index 0ea76ba4..f5870b29 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -6,12 +6,6 @@ #include #include -extern "C" { -int convert(char *inputFile, char *outputDir, const char *password); -extern int no_graphics; -extern int documentId; -} - namespace odr::internal::html { Html wvWare_wrapper(const std::string &input_path, From 7f31990143c68b9635310fa5e00c2294512aed39 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Wed, 28 Aug 2024 21:59:25 +0300 Subject: [PATCH 54/97] Add pdf2htmlEX wrapper tests. Will probably fail, because of missing env vars --- test/src/pdf2htmlEX_wrapper_test.cpp | 62 ++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 test/src/pdf2htmlEX_wrapper_test.cpp diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp new file mode 100644 index 00000000..14ab038b --- /dev/null +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -0,0 +1,62 @@ +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +using namespace odr; +using namespace odr::test; +using namespace odr::internal; +using namespace odr::test; +namespace fs = std::filesystem; + +using pdf2htmlEXWrapperTests = ::testing::TestWithParam; + +TEST_P(pdf2htmlEXWrapperTests, html) { + const std::string test_file_path = GetParam(); + const TestFile test_file = TestData::test_file(test_file_path); + + const std::string test_repo = *common::Path(test_file_path).begin(); + const std::string output_path_prefix = + common::Path("output").join(test_repo).join("output").join("pdf2htmlEX").string(); + const std::string output_path = + common::Path(output_path_prefix) + .join(common::Path(test_file_path).rebase(test_repo)) + .string(); + + std::cout << test_file.path << " to " << output_path << std::endl; + + if (!util::string::ends_with(test_file.path, ".pdf") && test_file.type != FileType::portable_document_format) { + GTEST_SKIP(); + } + + fs::create_directories(output_path); + HtmlConfig config; + std::optional password; + Html html = odr::internal::html::pdf2htmlEX_wrapper(test_file.path, output_path, config, password); + + for (const HtmlPage &html_page : html.pages()) { + EXPECT_TRUE(fs::is_regular_file(html_page.path)); + EXPECT_LT(0, fs::file_size(html_page.path)); + } +} + +INSTANTIATE_TEST_SUITE_P(all_test_files, pdf2htmlEXWrapperTests, + testing::ValuesIn(TestData::test_file_paths()), + [](const ::testing::TestParamInfo &info) { + std::string path = info.param; + internal::util::string::replace_all(path, "/", "_"); + internal::util::string::replace_all(path, "-", "_"); + internal::util::string::replace_all(path, "+", "_"); + internal::util::string::replace_all(path, ".", "_"); + internal::util::string::replace_all(path, " ", "_"); + internal::util::string::replace_all(path, "$", ""); + return path; + }); From 8466d3940b2b2f2c37e3c43b6bc55aa0f6c07c3e Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Wed, 28 Aug 2024 22:33:32 +0300 Subject: [PATCH 55/97] [2/2] Add pdf2htmlEX wrapper tests --- test/CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5142fc9c..084490e8 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -6,6 +6,12 @@ set(ODR_TEST_DATA_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/data") configure_file("src/test_constants.cpp.in" "src/test_constants.cpp") enable_testing() + +if(WITH_PDF2HTMLEX) + LIST(APPEND ODR_TEST_SOURCE_FILES + "src/pdf2htmlEX_wrapper_test.cpp" + ) +endif(WITH_PDF2HTMLEX) add_executable(odr_test "src/test_util.cpp" "${CMAKE_CURRENT_BINARY_DIR}/src/test_constants.cpp" @@ -43,6 +49,8 @@ add_executable(odr_test "src/internal/zip/miniz_test.cpp" "src/internal/zip/zip_archive_test.cpp" + + ${ODR_TEST_SOURCE_FILES} ) target_include_directories(odr_test PRIVATE @@ -62,4 +70,8 @@ target_link_libraries(odr_test odr ) + +if(WITH_PDF2HTMLEX) + target_link_libraries(odr_test PRIVATE pdf2htmlex::pdf2htmlex) +endif(WITH_PDF2HTMLEX) gtest_add_tests(TARGET odr_test) From fa7803a5ea70f7087402720ec5b49eed9a2fb8bb Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Wed, 28 Aug 2024 22:35:39 +0300 Subject: [PATCH 56/97] Format pdf2htmlEX_wrapper_test --- test/src/pdf2htmlEX_wrapper_test.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index 14ab038b..ae7c96ea 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -3,12 +3,12 @@ #include #include -#include #include +#include -#include #include #include +#include #include using namespace odr; @@ -24,8 +24,11 @@ TEST_P(pdf2htmlEXWrapperTests, html) { const TestFile test_file = TestData::test_file(test_file_path); const std::string test_repo = *common::Path(test_file_path).begin(); - const std::string output_path_prefix = - common::Path("output").join(test_repo).join("output").join("pdf2htmlEX").string(); + const std::string output_path_prefix = common::Path("output") + .join(test_repo) + .join("output") + .join("pdf2htmlEX") + .string(); const std::string output_path = common::Path(output_path_prefix) .join(common::Path(test_file_path).rebase(test_repo)) @@ -33,14 +36,16 @@ TEST_P(pdf2htmlEXWrapperTests, html) { std::cout << test_file.path << " to " << output_path << std::endl; - if (!util::string::ends_with(test_file.path, ".pdf") && test_file.type != FileType::portable_document_format) { + if (!util::string::ends_with(test_file.path, ".pdf") && + test_file.type != FileType::portable_document_format) { GTEST_SKIP(); } fs::create_directories(output_path); HtmlConfig config; std::optional password; - Html html = odr::internal::html::pdf2htmlEX_wrapper(test_file.path, output_path, config, password); + Html html = odr::internal::html::pdf2htmlEX_wrapper( + test_file.path, output_path, config, password); for (const HtmlPage &html_page : html.pages()) { EXPECT_TRUE(fs::is_regular_file(html_page.path)); From 04fd03db44254dc69d7b76afc231cbc5651f363b Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 29 Aug 2024 00:58:09 +0300 Subject: [PATCH 57/97] Fix env vars in pdf2htmlEX and wvWare --- conanfile.py | 14 ++++++++++++ src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 24 ++++++++++++++++++++ src/odr/internal/html/wvWare_wrapper.cpp | 18 +++++++++++---- src/odr/internal/project_info.hpp.in | 4 ++++ 4 files changed, 56 insertions(+), 4 deletions(-) diff --git a/conanfile.py b/conanfile.py index e4675db8..3a855af0 100644 --- a/conanfile.py +++ b/conanfile.py @@ -3,6 +3,8 @@ from conan import ConanFile from conan.tools.build import check_min_cppstd from conan.tools.cmake import CMakeToolchain, CMakeDeps, CMake +from conan.tools.env import Environment +from conan.tools.env.environment import EnvVars from conan.tools.files import copy @@ -68,6 +70,18 @@ def generate(self): tc.variables["ODR_TEST"] = False tc.variables["WITH_PDF2HTMLEX"] = self.options.get_safe("with_pdf2htmlEX", False) tc.variables["WITH_WVWARE"] = self.options.get_safe("with_wvWare", False) + + # Get runenv info, exported by package_info() of dependencies + # We need to obtain PDF2HTMLEX_DATA_DIR, POPPLER_DATA_DIR, FONTCONFIG_PATH and WVDATADIR + runenv_info = Environment() + deps = self.dependencies.host.topological_sort + deps = [dep for dep in reversed(deps.values())] + for dep in deps: + runenv_info.compose_env(dep.runenv_info) + envvars = runenv_info.vars(self) + for v in ["PDF2HTMLEX_DATA_DIR", "POPPLER_DATA_DIR", "FONTCONFIG_PATH", "WVDATADIR"]: + tc.variables[v] = envvars.get(v) + tc.generate() deps = CMakeDeps(self) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index 012dbe72..c85a9052 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -8,12 +8,36 @@ #include +#include + namespace odr::internal { +static void ensure_env_vars() { + static const char *pdf2htmlEX_data_dir = getenv("PDF2HTMLEX_DATA_DIR"); + if (nullptr == pdf2htmlEX_data_dir) { + pdf2htmlEX_data_dir = PDF2HTMLEX_DATA_DIR; + setenv("PDF2HTMLEX_DATA_DIR", pdf2htmlEX_data_dir, 0); + } + + static const char *poppler_data_dir = getenv("POPPLER_DATA_DIR"); + if (nullptr == poppler_data_dir) { + poppler_data_dir = POPPLER_DATA_DIR; + setenv("POPPLER_DATA_DIR", poppler_data_dir, 0); + } + + static const char *fontconfig_path = getenv("FONTCONFIG_PATH"); + if (nullptr == fontconfig_path) { + fontconfig_path = FONTCONFIG_PATH; + setenv("FONTCONFIG_PATH", fontconfig_path, 0); + } +} + Html html::pdf2htmlEX_wrapper(const std::string &input_path, const std::string &output_path, const HtmlConfig &config, std::optional &password) { + ensure_env_vars(); + pdf2htmlEX::pdf2htmlEX pdf2htmlEX; pdf2htmlEX.setInputFilename(input_path); diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index f5870b29..6093fd68 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -3,14 +3,25 @@ #include #include #include +#include #include #include namespace odr::internal::html { +static void ensure_env_vars() { + static const char *wvdatadir = getenv("WVDATADIR"); + if (nullptr == wvdatadir) { + wvdatadir = WVDATADIR; + setenv("WVDATADIR", wvdatadir, 0); + } +} + Html wvWare_wrapper(const std::string &input_path, const std::string &output_path, const HtmlConfig &config, std::optional &password) { + ensure_env_vars(); + auto output_file_path = output_path + "/document.html"; char *input_file_path = strdup(input_path.c_str()); @@ -18,12 +29,11 @@ Html wvWare_wrapper(const std::string &input_path, g_htmlOutputFileHandle = fopen(output_file_path.c_str(), "w"); - std::string password_value; + std::string pw; if (password.has_value()) { - password_value = password.value(); + pw = password.value(); } - int retVal = - wvHtml_convert(input_file_path, output_dir, password_value.c_str()); + int retVal = wvHtml_convert(input_file_path, output_dir, pw.c_str()); free(output_dir); free(input_file_path); fclose(g_htmlOutputFileHandle); diff --git a/src/odr/internal/project_info.hpp.in b/src/odr/internal/project_info.hpp.in index d93c9d88..0af208e4 100644 --- a/src/odr/internal/project_info.hpp.in +++ b/src/odr/internal/project_info.hpp.in @@ -6,6 +6,10 @@ const char *version() noexcept; } // namespace odr::internal::project_info #cmakedefine WITH_PDF2HTMLEX 1 +#cmakedefine PDF2HTMLEX_DATA_DIR "@PDF2HTMLEX_DATA_DIR@" +#cmakedefine POPPLER_DATA_DIR "@POPPLER_DATA_DIR@" +#cmakedefine FONTCONFIG_PATH "@FONTCONFIG_PATH@" #cmakedefine WITH_WVWARE 1 +#cmakedefine WVDATADIR "@WVDATADIR@" #endif // ODR_INTERNAL_PROJECT_INFO_HPP From 7a3e1e9ec9feb471cac2addf11cb9dcb48d36eab Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 29 Aug 2024 01:01:20 +0300 Subject: [PATCH 58/97] add wvWare wrapper tests --- test/CMakeLists.txt | 9 +++++ test/src/wvWare_wrapper_test.cpp | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 test/src/wvWare_wrapper_test.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 084490e8..4b4fddfe 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,6 +12,11 @@ if(WITH_PDF2HTMLEX) "src/pdf2htmlEX_wrapper_test.cpp" ) endif(WITH_PDF2HTMLEX) +if(WITH_WVWARE) + LIST(APPEND ODR_TEST_SOURCE_FILES + "src/wvWare_wrapper_test.cpp" + ) +endif(WITH_WVWARE) add_executable(odr_test "src/test_util.cpp" "${CMAKE_CURRENT_BINARY_DIR}/src/test_constants.cpp" @@ -74,4 +79,8 @@ target_link_libraries(odr_test if(WITH_PDF2HTMLEX) target_link_libraries(odr_test PRIVATE pdf2htmlex::pdf2htmlex) endif(WITH_PDF2HTMLEX) +if(WITH_WVWARE) + target_link_libraries(odr_test PRIVATE wvware::wvware) +endif(WITH_WVWARE) + gtest_add_tests(TARGET odr_test) diff --git a/test/src/wvWare_wrapper_test.cpp b/test/src/wvWare_wrapper_test.cpp new file mode 100644 index 00000000..292477a9 --- /dev/null +++ b/test/src/wvWare_wrapper_test.cpp @@ -0,0 +1,67 @@ +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +using namespace odr; +using namespace odr::test; +using namespace odr::internal; +using namespace odr::test; +namespace fs = std::filesystem; + +using wvWareWrapperTests = ::testing::TestWithParam; + +TEST_P(wvWareWrapperTests, html) { + const std::string test_file_path = GetParam(); + const TestFile test_file = TestData::test_file(test_file_path); + + const std::string test_repo = *common::Path(test_file_path).begin(); + const std::string output_path_prefix = common::Path("output") + .join(test_repo) + .join("output") + .join("wvWare") + .string(); + const std::string output_path = + common::Path(output_path_prefix) + .join(common::Path(test_file_path).rebase(test_repo)) + .string(); + + std::cout << test_file.path << " to " << output_path << std::endl; + + if (!util::string::ends_with(test_file.path, ".doc") && + test_file.type != FileType::legacy_word_document) { + GTEST_SKIP(); + } + + fs::create_directories(output_path); + HtmlConfig config; + std::optional password; + Html html = odr::internal::html::wvWare_wrapper( + test_file.path, output_path, config, password); + + for (const HtmlPage &html_page : html.pages()) { + EXPECT_TRUE(fs::is_regular_file(html_page.path)); + EXPECT_LT(0, fs::file_size(html_page.path)); + } +} + +INSTANTIATE_TEST_SUITE_P(all_test_files, wvWareWrapperTests, + testing::ValuesIn(TestData::test_file_paths()), + [](const ::testing::TestParamInfo &info) { + std::string path = info.param; + internal::util::string::replace_all(path, "/", "_"); + internal::util::string::replace_all(path, "-", "_"); + internal::util::string::replace_all(path, "+", "_"); + internal::util::string::replace_all(path, ".", "_"); + internal::util::string::replace_all(path, " ", "_"); + internal::util::string::replace_all(path, "$", ""); + return path; + }); From ddbb7b43b7e7572bc0dcc86a304061e31b4ae835 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 29 Aug 2024 01:02:22 +0300 Subject: [PATCH 59/97] Formatting --- test/src/wvWare_wrapper_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/src/wvWare_wrapper_test.cpp b/test/src/wvWare_wrapper_test.cpp index 292477a9..90d8bf40 100644 --- a/test/src/wvWare_wrapper_test.cpp +++ b/test/src/wvWare_wrapper_test.cpp @@ -44,8 +44,8 @@ TEST_P(wvWareWrapperTests, html) { fs::create_directories(output_path); HtmlConfig config; std::optional password; - Html html = odr::internal::html::wvWare_wrapper( - test_file.path, output_path, config, password); + Html html = odr::internal::html::wvWare_wrapper(test_file.path, output_path, + config, password); for (const HtmlPage &html_page : html.pages()) { EXPECT_TRUE(fs::is_regular_file(html_page.path)); From 714a3b137184999204c1e6d9f399d10d3c01ad27 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 29 Aug 2024 01:28:07 +0300 Subject: [PATCH 60/97] Don't build with pdf2htmlEX on Macos, no idea how it wasn't build breaking before --- conanfile.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/conanfile.py b/conanfile.py index 3a855af0..ae5c7f71 100644 --- a/conanfile.py +++ b/conanfile.py @@ -54,11 +54,9 @@ def validate_build(self): def config_options(self): if self.settings.os == "Windows": del self.options.fPIC - self.options.with_pdf2htmlEX = False - self.options.with_wvWare = False - else: - self.options.with_pdf2htmlEX = True - self.options.with_wvWare = True + + self.options.with_pdf2htmlEX = self.settings.os not in ["Windows", "Macos"] + self.options.with_wvWare = self.settings.os != "Windows" def configure(self): if self.options.shared: From c30a155622a545158ba665b2aa33da9a8b556f26 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 29 Aug 2024 02:22:35 +0300 Subject: [PATCH 61/97] Supply password to password protected test in pdf2htmlEX wrapper test --- test/src/pdf2htmlEX_wrapper_test.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index ae7c96ea..3cddfce0 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -44,6 +44,11 @@ TEST_P(pdf2htmlEXWrapperTests, html) { fs::create_directories(output_path); HtmlConfig config; std::optional password; + + if (test_file.password_encrypted) { + password = test_file.password; + } + Html html = odr::internal::html::pdf2htmlEX_wrapper( test_file.path, output_path, config, password); From 961ccbbe77e53b1d7d03f986af5c676522ea7458 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 29 Aug 2024 02:29:00 +0300 Subject: [PATCH 62/97] Skip password protected files in wvWare wrapper test --- test/src/wvWare_wrapper_test.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/src/wvWare_wrapper_test.cpp b/test/src/wvWare_wrapper_test.cpp index 90d8bf40..ae6f9899 100644 --- a/test/src/wvWare_wrapper_test.cpp +++ b/test/src/wvWare_wrapper_test.cpp @@ -41,6 +41,11 @@ TEST_P(wvWareWrapperTests, html) { GTEST_SKIP(); } + // Password protected files are problematic on wvWare + if (test_file.password_encrypted) { + GTEST_SKIP(); + } + fs::create_directories(output_path); HtmlConfig config; std::optional password; From 80b356af0f05bcf49ae40be5e6566101b7f4ffac Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 29 Aug 2024 02:40:30 +0300 Subject: [PATCH 63/97] Disable wvWare on Macos --- conanfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conanfile.py b/conanfile.py index ae5c7f71..067af3a4 100644 --- a/conanfile.py +++ b/conanfile.py @@ -56,7 +56,7 @@ def config_options(self): del self.options.fPIC self.options.with_pdf2htmlEX = self.settings.os not in ["Windows", "Macos"] - self.options.with_wvWare = self.settings.os != "Windows" + self.options.with_wvWare = self.settings.os not in ["Windows", "Macos"] def configure(self): if self.options.shared: From 1d0fcd41d75c654d418b854b8d101462fa7cdb99 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 01:12:28 +0300 Subject: [PATCH 64/97] Fix password for encrypted_fontfile3_opentype.pdf --- test/src/pdf2htmlEX_wrapper_test.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index 3cddfce0..b3d6419c 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -48,6 +48,10 @@ TEST_P(pdf2htmlEXWrapperTests, html) { if (test_file.password_encrypted) { password = test_file.password; } + // @TODO: why does test_file.password_encrypted == false for this file?? + else if (test_file.path.ends_with("encrypted_fontfile3_opentype.pdf")) { + password = "sample-user-password"; + } Html html = odr::internal::html::pdf2htmlEX_wrapper( test_file.path, output_path, config, password); From ed6122a5b97e98f1eb51389b5dd1e0e2c32dc821 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 03:17:28 +0300 Subject: [PATCH 65/97] Try test on ubuntu-22.04/gcc-13 --- .github/workflows/build_test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 671cfbbb..cd5a4a0d 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -20,6 +20,7 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } + - { os: ubuntu-22.04, compiler: gcc-13 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: armv8-clang-14 } - { os: windows-2022, compiler: msvc-1940 } @@ -172,6 +173,7 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } + - { os: ubuntu-22.04, compiler: gcc-13 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: armv8-clang-14 } # Windows test disabled because: From b1015058afe1e72defe630571cd4898f5eea94ab Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 03:19:47 +0300 Subject: [PATCH 66/97] [2/2]Try test on ubuntu-22.04/gcc-13 --- .../config/ubuntu-22.04-gcc-13/conan/profiles/default | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/config/ubuntu-22.04-gcc-13/conan/profiles/default diff --git a/.github/config/ubuntu-22.04-gcc-13/conan/profiles/default b/.github/config/ubuntu-22.04-gcc-13/conan/profiles/default new file mode 100644 index 00000000..88351ab7 --- /dev/null +++ b/.github/config/ubuntu-22.04-gcc-13/conan/profiles/default @@ -0,0 +1,11 @@ +[settings] +arch=x86_64 +build_type=Release +compiler=gcc +compiler.version=13 +compiler.cppstd=20 +compiler.libcxx=libstdc++11 +os=Linux + +[conf] +tools.build:compiler_executables={'c': 'gcc-13', 'cpp': 'g++-13'} \ No newline at end of file From 9e7df8f09218f8a8cf129edf6b2955f4b436ca5d Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 03:25:46 +0300 Subject: [PATCH 67/97] Try gcc-12 instead of gcc-13 --- .../conan/profiles/default | 4 ++-- .github/workflows/build_test.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) rename .github/config/{ubuntu-22.04-gcc-13 => ubuntu-22.04-gcc-12}/conan/profiles/default (58%) diff --git a/.github/config/ubuntu-22.04-gcc-13/conan/profiles/default b/.github/config/ubuntu-22.04-gcc-12/conan/profiles/default similarity index 58% rename from .github/config/ubuntu-22.04-gcc-13/conan/profiles/default rename to .github/config/ubuntu-22.04-gcc-12/conan/profiles/default index 88351ab7..efe73c3a 100644 --- a/.github/config/ubuntu-22.04-gcc-13/conan/profiles/default +++ b/.github/config/ubuntu-22.04-gcc-12/conan/profiles/default @@ -2,10 +2,10 @@ arch=x86_64 build_type=Release compiler=gcc -compiler.version=13 +compiler.version=12 compiler.cppstd=20 compiler.libcxx=libstdc++11 os=Linux [conf] -tools.build:compiler_executables={'c': 'gcc-13', 'cpp': 'g++-13'} \ No newline at end of file +tools.build:compiler_executables={'c': 'gcc-12', 'cpp': 'g++-12'} \ No newline at end of file diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index cd5a4a0d..c47a9d31 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -20,7 +20,7 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } - - { os: ubuntu-22.04, compiler: gcc-13 } + - { os: ubuntu-22.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: armv8-clang-14 } - { os: windows-2022, compiler: msvc-1940 } @@ -173,7 +173,7 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } - - { os: ubuntu-22.04, compiler: gcc-13 } + - { os: ubuntu-22.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: armv8-clang-14 } # Windows test disabled because: From 1fb92b5075036428954cdf1871a23aac1e053faf Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 03:29:04 +0300 Subject: [PATCH 68/97] Remove gcc-12 --- .../config/ubuntu-22.04-gcc-12/conan/profiles/default | 11 ----------- .github/workflows/build_test.yml | 2 -- 2 files changed, 13 deletions(-) delete mode 100644 .github/config/ubuntu-22.04-gcc-12/conan/profiles/default diff --git a/.github/config/ubuntu-22.04-gcc-12/conan/profiles/default b/.github/config/ubuntu-22.04-gcc-12/conan/profiles/default deleted file mode 100644 index efe73c3a..00000000 --- a/.github/config/ubuntu-22.04-gcc-12/conan/profiles/default +++ /dev/null @@ -1,11 +0,0 @@ -[settings] -arch=x86_64 -build_type=Release -compiler=gcc -compiler.version=12 -compiler.cppstd=20 -compiler.libcxx=libstdc++11 -os=Linux - -[conf] -tools.build:compiler_executables={'c': 'gcc-12', 'cpp': 'g++-12'} \ No newline at end of file diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index c47a9d31..671cfbbb 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -20,7 +20,6 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } - - { os: ubuntu-22.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: armv8-clang-14 } - { os: windows-2022, compiler: msvc-1940 } @@ -173,7 +172,6 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } - - { os: ubuntu-22.04, compiler: gcc-12 } - { os: macos-13, compiler: clang-14 } - { os: macos-14, compiler: armv8-clang-14 } # Windows test disabled because: From fd01c3f722f60ad55c479c38572ae4aa8d14edf6 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 04:13:09 +0300 Subject: [PATCH 69/97] Attempt to print stacktrace to figure out what the exception is --- CMakeLists.txt | 2 +- test/src/pdf2htmlEX_wrapper_test.cpp | 26 +++++++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ce033240..7f0eec36 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR # using clang or gcc set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g -rdynamic") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # debugging #set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_DEBUG") diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index b3d6419c..c47acaf9 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -53,12 +53,28 @@ TEST_P(pdf2htmlEXWrapperTests, html) { password = "sample-user-password"; } - Html html = odr::internal::html::pdf2htmlEX_wrapper( - test_file.path, output_path, config, password); + try { + Html html = odr::internal::html::pdf2htmlEX_wrapper( + test_file.path, output_path, config, password); - for (const HtmlPage &html_page : html.pages()) { - EXPECT_TRUE(fs::is_regular_file(html_page.path)); - EXPECT_LT(0, fs::file_size(html_page.path)); + for (const HtmlPage &html_page : html.pages()) { + EXPECT_TRUE(fs::is_regular_file(html_page.path)); + EXPECT_LT(0, fs::file_size(html_page.path)); + } + } catch (const std::exception & e) { + std::cerr << e.what() << std::endl << std::flush; + + void *array[10]; + int size = backtrace(array, 10); + char ** symbols = backtrace_symbols(array, size); + for (int i = 0; i < size; i++) { + std::cerr << symbols[i] << std::endl; + } + free(symbols); + std::cerr << std::flush; + sleep(2); + + throw e; } } From 34905083e25ac1d6375ca4d513d61b9224030cc0 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 04:19:08 +0300 Subject: [PATCH 70/97] Include execinfo header. Print test file --- test/src/pdf2htmlEX_wrapper_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index c47acaf9..dbf1bfba 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -62,7 +63,7 @@ TEST_P(pdf2htmlEXWrapperTests, html) { EXPECT_LT(0, fs::file_size(html_page.path)); } } catch (const std::exception & e) { - std::cerr << e.what() << std::endl << std::flush; + std::cerr << test_file.path << std::endl << e.what() << std::endl << std::flush; void *array[10]; int size = backtrace(array, 10); From 2d751a2cea981bf4cfecd0132a429efc32390814 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 04:20:19 +0300 Subject: [PATCH 71/97] print stacktrace only on gcc --- test/src/pdf2htmlEX_wrapper_test.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index dbf1bfba..d6fcb639 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -1,7 +1,9 @@ #include #include #include +#ifdef __GNUC__ #include +#endif #include #include @@ -65,6 +67,7 @@ TEST_P(pdf2htmlEXWrapperTests, html) { } catch (const std::exception & e) { std::cerr << test_file.path << std::endl << e.what() << std::endl << std::flush; +#ifdef __GNUC__ void *array[10]; int size = backtrace(array, 10); char ** symbols = backtrace_symbols(array, size); @@ -74,6 +77,7 @@ TEST_P(pdf2htmlEXWrapperTests, html) { free(symbols); std::cerr << std::flush; sleep(2); +#endif throw e; } From 351180141e840ed69097b555defb9303fa7a8fb4 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 04:21:09 +0300 Subject: [PATCH 72/97] Format --- test/src/pdf2htmlEX_wrapper_test.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index d6fcb639..746965ed 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -64,13 +64,15 @@ TEST_P(pdf2htmlEXWrapperTests, html) { EXPECT_TRUE(fs::is_regular_file(html_page.path)); EXPECT_LT(0, fs::file_size(html_page.path)); } - } catch (const std::exception & e) { - std::cerr << test_file.path << std::endl << e.what() << std::endl << std::flush; + } catch (const std::exception &e) { + std::cerr << test_file.path << std::endl + << e.what() << std::endl + << std::flush; #ifdef __GNUC__ void *array[10]; int size = backtrace(array, 10); - char ** symbols = backtrace_symbols(array, size); + char **symbols = backtrace_symbols(array, size); for (int i = 0; i < size; i++) { std::cerr << symbols[i] << std::endl; } From 76375fb298ec471b511229c0c568c11abe8f0a65 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 04:35:14 +0300 Subject: [PATCH 73/97] Add extra test output --- test/src/pdf2htmlEX_wrapper_test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index 746965ed..0afccb94 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -83,6 +83,7 @@ TEST_P(pdf2htmlEXWrapperTests, html) { throw e; } + std::cerr << "End of test" << std::endl << std::flush; } INSTANTIATE_TEST_SUITE_P(all_test_files, pdf2htmlEXWrapperTests, From a38b9baef3b107a4bb33deb2300996260b46d830 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 04:35:44 +0300 Subject: [PATCH 74/97] Print stacktrace on clang too --- test/src/pdf2htmlEX_wrapper_test.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index 0afccb94..e7c27b07 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -1,9 +1,7 @@ #include #include #include -#ifdef __GNUC__ #include -#endif #include #include @@ -69,7 +67,6 @@ TEST_P(pdf2htmlEXWrapperTests, html) { << e.what() << std::endl << std::flush; -#ifdef __GNUC__ void *array[10]; int size = backtrace(array, 10); char **symbols = backtrace_symbols(array, size); @@ -79,7 +76,6 @@ TEST_P(pdf2htmlEXWrapperTests, html) { free(symbols); std::cerr << std::flush; sleep(2); -#endif throw e; } From e4c2fffcc2bcb5266c89eacd67d49ae03c4c4850 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 04:36:29 +0300 Subject: [PATCH 75/97] Temporarily disable macos and msvc tests --- .github/workflows/build_test.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 671cfbbb..3db6dc6b 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -20,9 +20,9 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } - - { os: macos-13, compiler: clang-14 } - - { os: macos-14, compiler: armv8-clang-14 } - - { os: windows-2022, compiler: msvc-1940 } +# - { os: macos-13, compiler: clang-14 } +# - { os: macos-14, compiler: armv8-clang-14 } +# - { os: windows-2022, compiler: msvc-1940 } steps: - name: checkout uses: actions/checkout@v4 @@ -172,8 +172,8 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } - - { os: macos-13, compiler: clang-14 } - - { os: macos-14, compiler: armv8-clang-14 } +# - { os: macos-13, compiler: clang-14 } +# - { os: macos-14, compiler: armv8-clang-14 } # Windows test disabled because: # Running main() from C:\Users\runneradmin\.conan2\p\b\gtestdd9407d368b89\b\src\googletest\src\gtest_main.cc # [ FATAL ] C:/Users/runneradmin/.conan2/p/gtest28fa6787e7f6e/p/include\gtest/internal/gtest-param-util.h(585):: Condition IsValidParamName(param_name) failed. Parameterized test name 'odr_private\docx\03_smpldap_docx' is invalid, in D:\a\OpenDocument.core\OpenDocument.core\test\src\html_output_test.cpp line 129 From 18946869a2ed2a95634730daa6dcd82bdf22eaea Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Sun, 1 Sep 2024 04:39:56 +0300 Subject: [PATCH 76/97] formatting --- test/src/pdf2htmlEX_wrapper_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index e7c27b07..c21aa57b 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -1,7 +1,7 @@ +#include #include #include #include -#include #include #include From dd67e284a86a5ae9defbb61c708f88d731aa069a Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 2 Sep 2024 00:27:55 +0300 Subject: [PATCH 77/97] Temporarily disable build-test-downstream and docker --- .github/workflows/build_test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 3db6dc6b..aadd3ea0 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -107,6 +107,7 @@ jobs: build/test/Release/odr_test.exe docker: + if: false needs: build runs-on: ${{ matrix.os }} strategy: @@ -242,6 +243,7 @@ jobs: build/test/output/odr-private/output build-test-downstream: + if: false runs-on: ${{ matrix.os }} strategy: fail-fast: false From 5ed5d73cdb5625d4e017c4e81cfa8214f6f5dfe9 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 2 Sep 2024 00:31:38 +0300 Subject: [PATCH 78/97] Add more instrumentation --- test/src/pdf2htmlEX_wrapper_test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index c21aa57b..2eae98ac 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -54,16 +54,17 @@ TEST_P(pdf2htmlEXWrapperTests, html) { password = "sample-user-password"; } + std::cout << "Calling pdf2htmlEX_wrapper" << std::endl << std::flush; try { Html html = odr::internal::html::pdf2htmlEX_wrapper( test_file.path, output_path, config, password); - + std::cout << "Returned from pdf2htmlEX_wrapper" << std::endl << std::flush; for (const HtmlPage &html_page : html.pages()) { EXPECT_TRUE(fs::is_regular_file(html_page.path)); EXPECT_LT(0, fs::file_size(html_page.path)); } } catch (const std::exception &e) { - std::cerr << test_file.path << std::endl + std::cerr << "Exception in pdf2htmlEX_wrapper: " << std::endl << e.what() << std::endl << std::flush; From aa4b752e1c9492c32edf0d7ea861bcfdd0a02e0a Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 2 Sep 2024 01:50:32 +0300 Subject: [PATCH 79/97] Reenable tests --- .github/workflows/build_test.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index aadd3ea0..676a0655 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -20,9 +20,9 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } -# - { os: macos-13, compiler: clang-14 } -# - { os: macos-14, compiler: armv8-clang-14 } -# - { os: windows-2022, compiler: msvc-1940 } + - { os: macos-13, compiler: clang-14 } + - { os: macos-14, compiler: armv8-clang-14 } + - { os: windows-2022, compiler: msvc-1940 } steps: - name: checkout uses: actions/checkout@v4 @@ -107,7 +107,6 @@ jobs: build/test/Release/odr_test.exe docker: - if: false needs: build runs-on: ${{ matrix.os }} strategy: @@ -243,7 +242,6 @@ jobs: build/test/output/odr-private/output build-test-downstream: - if: false runs-on: ${{ matrix.os }} strategy: fail-fast: false From 64cec0637557215b66276212ad811e1701611738 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 2 Sep 2024 01:59:48 +0300 Subject: [PATCH 80/97] [2/2] reenable tests --- .github/workflows/build_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index e3a43b1b..02716112 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -164,8 +164,8 @@ jobs: include: - { os: ubuntu-24.04, compiler: clang-18 } - { os: ubuntu-24.04, compiler: gcc-14 } -# - { os: macos-13, compiler: clang-14 } -# - { os: macos-14, compiler: armv8-clang-14 } + - { os: macos-13, compiler: clang-14 } + - { os: macos-14, compiler: armv8-clang-14 } # Windows test disabled because: # Running main() from C:\Users\runneradmin\.conan2\p\b\gtestdd9407d368b89\b\src\googletest\src\gtest_main.cc # [ FATAL ] C:/Users/runneradmin/.conan2/p/gtest28fa6787e7f6e/p/include\gtest/internal/gtest-param-util.h(585):: Condition IsValidParamName(param_name) failed. Parameterized test name 'odr_private\docx\03_smpldap_docx' is invalid, in D:\a\OpenDocument.core\OpenDocument.core\test\src\html_output_test.cpp line 129 From 7706a94b589321712480ad5c1bdbeb86e5521fee Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 2 Sep 2024 02:05:47 +0300 Subject: [PATCH 81/97] Add more instrumentation --- src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index c85a9052..28642d7a 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -17,18 +17,21 @@ static void ensure_env_vars() { if (nullptr == pdf2htmlEX_data_dir) { pdf2htmlEX_data_dir = PDF2HTMLEX_DATA_DIR; setenv("PDF2HTMLEX_DATA_DIR", pdf2htmlEX_data_dir, 0); + std::cout << "PDF2HTMLEX_DATA_DIR set to " << getenv("PDF2HTMLEX_DATA_DIR") << std::endl; } static const char *poppler_data_dir = getenv("POPPLER_DATA_DIR"); if (nullptr == poppler_data_dir) { poppler_data_dir = POPPLER_DATA_DIR; setenv("POPPLER_DATA_DIR", poppler_data_dir, 0); + std::cout << "POPPLER_DATA_DIR set to " << getenv("POPPLER_DATA_DIR") << std::endl; } static const char *fontconfig_path = getenv("FONTCONFIG_PATH"); if (nullptr == fontconfig_path) { fontconfig_path = FONTCONFIG_PATH; setenv("FONTCONFIG_PATH", fontconfig_path, 0); + std::cout << "FONTCONFIG_PATH set to " << getenv("FONTCONFIG_PATH") << std::endl; } } From 50bbb4d63470d92bf0d1dada2f66104cda606688 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 2 Sep 2024 02:06:38 +0300 Subject: [PATCH 82/97] Format --- src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index 28642d7a..31046e93 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -17,21 +17,24 @@ static void ensure_env_vars() { if (nullptr == pdf2htmlEX_data_dir) { pdf2htmlEX_data_dir = PDF2HTMLEX_DATA_DIR; setenv("PDF2HTMLEX_DATA_DIR", pdf2htmlEX_data_dir, 0); - std::cout << "PDF2HTMLEX_DATA_DIR set to " << getenv("PDF2HTMLEX_DATA_DIR") << std::endl; + std::cout << "PDF2HTMLEX_DATA_DIR set to " << getenv("PDF2HTMLEX_DATA_DIR") + << std::endl; } static const char *poppler_data_dir = getenv("POPPLER_DATA_DIR"); if (nullptr == poppler_data_dir) { poppler_data_dir = POPPLER_DATA_DIR; setenv("POPPLER_DATA_DIR", poppler_data_dir, 0); - std::cout << "POPPLER_DATA_DIR set to " << getenv("POPPLER_DATA_DIR") << std::endl; + std::cout << "POPPLER_DATA_DIR set to " << getenv("POPPLER_DATA_DIR") + << std::endl; } static const char *fontconfig_path = getenv("FONTCONFIG_PATH"); if (nullptr == fontconfig_path) { fontconfig_path = FONTCONFIG_PATH; setenv("FONTCONFIG_PATH", fontconfig_path, 0); - std::cout << "FONTCONFIG_PATH set to " << getenv("FONTCONFIG_PATH") << std::endl; + std::cout << "FONTCONFIG_PATH set to " << getenv("FONTCONFIG_PATH") + << std::endl; } } From 4e99ad4151cb8c80b83c2843e034c85993de64ce Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 2 Sep 2024 02:48:17 +0300 Subject: [PATCH 83/97] Update test filtering --- test/src/pdf2htmlEX_wrapper_test.cpp | 10 +++------- test/src/test_util.cpp | 15 +++++++++++++++ test/src/test_util.hpp | 2 ++ test/src/wvWare_wrapper_test.cpp | 10 +++------- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index 2eae98ac..8aaf123e 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -37,11 +37,6 @@ TEST_P(pdf2htmlEXWrapperTests, html) { std::cout << test_file.path << " to " << output_path << std::endl; - if (!util::string::ends_with(test_file.path, ".pdf") && - test_file.type != FileType::portable_document_format) { - GTEST_SKIP(); - } - fs::create_directories(output_path); HtmlConfig config; std::optional password; @@ -83,8 +78,9 @@ TEST_P(pdf2htmlEXWrapperTests, html) { std::cerr << "End of test" << std::endl << std::flush; } -INSTANTIATE_TEST_SUITE_P(all_test_files, pdf2htmlEXWrapperTests, - testing::ValuesIn(TestData::test_file_paths()), +INSTANTIATE_TEST_SUITE_P(pdf2htmlEX_test_files, pdf2htmlEXWrapperTests, + testing::ValuesIn(TestData::test_file_paths( + FileType::portable_document_format)), [](const ::testing::TestParamInfo &info) { std::string path = info.param; internal::util::string::replace_all(path, "/", "_"); diff --git a/test/src/test_util.cpp b/test/src/test_util.cpp index ca9b9e91..764ad4ea 100644 --- a/test/src/test_util.cpp +++ b/test/src/test_util.cpp @@ -122,6 +122,10 @@ std::vector TestData::test_file_paths() { return instance_().test_file_paths_(); } +std::vector TestData::test_file_paths(FileType fileType) { + return instance_().test_file_paths_(fileType); +} + TestFile TestData::test_file(const std::string &path) { return instance_().test_file_(path); } @@ -141,6 +145,17 @@ std::vector TestData::test_file_paths_() const { return result; } +std::vector TestData::test_file_paths_(FileType fileType) const { + std::vector result; + for (auto &&file : m_test_files) { + if (file.second.type == fileType) { + result.push_back(file.first); + } + } + std::sort(std::begin(result), std::end(result)); + return result; +} + TestFile TestData::test_file_(const std::string &path) const { return m_test_files.at(path); } diff --git a/test/src/test_util.hpp b/test/src/test_util.hpp index e13ac160..324f76c4 100644 --- a/test/src/test_util.hpp +++ b/test/src/test_util.hpp @@ -26,6 +26,7 @@ class TestData { static std::string data_input_directory(); static std::vector test_file_paths(); + static std::vector test_file_paths(FileType); static TestFile test_file(const std::string &path); static std::string test_file_path(const std::string &path); @@ -39,6 +40,7 @@ class TestData { static TestData &instance_(); std::vector test_file_paths_() const; + std::vector test_file_paths_(FileType) const; TestFile test_file_(const std::string &path) const; std::unordered_map m_test_files; diff --git a/test/src/wvWare_wrapper_test.cpp b/test/src/wvWare_wrapper_test.cpp index ae6f9899..17fc1ba0 100644 --- a/test/src/wvWare_wrapper_test.cpp +++ b/test/src/wvWare_wrapper_test.cpp @@ -36,11 +36,6 @@ TEST_P(wvWareWrapperTests, html) { std::cout << test_file.path << " to " << output_path << std::endl; - if (!util::string::ends_with(test_file.path, ".doc") && - test_file.type != FileType::legacy_word_document) { - GTEST_SKIP(); - } - // Password protected files are problematic on wvWare if (test_file.password_encrypted) { GTEST_SKIP(); @@ -58,8 +53,9 @@ TEST_P(wvWareWrapperTests, html) { } } -INSTANTIATE_TEST_SUITE_P(all_test_files, wvWareWrapperTests, - testing::ValuesIn(TestData::test_file_paths()), +INSTANTIATE_TEST_SUITE_P(wvWare_test_files, wvWareWrapperTests, + testing::ValuesIn(TestData::test_file_paths( + FileType::legacy_word_document)), [](const ::testing::TestParamInfo &info) { std::string path = info.param; internal::util::string::replace_all(path, "/", "_"); From aa8e3f8d41d2a051a6041ead047ab1f08e9f7923 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Mon, 2 Sep 2024 23:56:53 +0300 Subject: [PATCH 84/97] Artifact and download .conan2 dir. Test job needs this --- .github/workflows/build_test.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 02716112..f4c4cae6 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -98,6 +98,13 @@ jobs: build/test/odr_test build/test/Release/odr_test.exe + - name: Artifact .conan2/p dir + uses: actions/upload-artifact@v4 + with: + name: conan2-${{ matrix.os }}-${{ matrix.compiler }} + path: ~/.conan2/p + if-no-files-found: error + docker: needs: build runs-on: ${{ matrix.os }} @@ -197,6 +204,12 @@ jobs: name: bin-${{ matrix.os }}-${{ matrix.compiler }} path: . + - name: Download .conan2/p dir + uses: actions/download-artifact@v4 + with: + name: conan2-${{ matrix.os }}-${{ matrix.compiler }} + path: ~/.conan2/p + - name: fix artifact permissions if: runner.os != 'Windows' run: chmod +x build/test/odr_test From a21fb294701f1428a5a13cfafc46420ebefacc7b Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 3 Sep 2024 00:29:10 +0300 Subject: [PATCH 85/97] test speed of compression levels --- .github/workflows/build_test.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index f4c4cae6..cba575dd 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -105,6 +105,22 @@ jobs: path: ~/.conan2/p if-no-files-found: error + - name: Artifact .conan2/p dir - comp 1 + uses: actions/upload-artifact@v4 + with: + name: conan2-1-${{ matrix.os }}-${{ matrix.compiler }} + path: ~/.conan2/p + if-no-files-found: error + compression-level: 1 + + - name: Artifact .conan2/p dir - comp 9 + uses: actions/upload-artifact@v4 + with: + name: conan2-9-${{ matrix.os }}-${{ matrix.compiler }} + path: ~/.conan2/p + if-no-files-found: error + compression-level: 9 + docker: needs: build runs-on: ${{ matrix.os }} From d23ae95917d53fad97bd28141a0f19c3369f9ce3 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 3 Sep 2024 00:36:40 +0300 Subject: [PATCH 86/97] Set artifact compression level to 0 --- .github/workflows/build_test.yml | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index cba575dd..a513eb37 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -97,6 +97,7 @@ jobs: install build/test/odr_test build/test/Release/odr_test.exe + compression-level: 0 - name: Artifact .conan2/p dir uses: actions/upload-artifact@v4 @@ -104,22 +105,7 @@ jobs: name: conan2-${{ matrix.os }}-${{ matrix.compiler }} path: ~/.conan2/p if-no-files-found: error - - - name: Artifact .conan2/p dir - comp 1 - uses: actions/upload-artifact@v4 - with: - name: conan2-1-${{ matrix.os }}-${{ matrix.compiler }} - path: ~/.conan2/p - if-no-files-found: error - compression-level: 1 - - - name: Artifact .conan2/p dir - comp 9 - uses: actions/upload-artifact@v4 - with: - name: conan2-9-${{ matrix.os }}-${{ matrix.compiler }} - path: ~/.conan2/p - if-no-files-found: error - compression-level: 9 + compression-level: 0 docker: needs: build From 06903d069d06f11d8a669152c7998402c8a84981 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 3 Sep 2024 01:32:32 +0300 Subject: [PATCH 87/97] Keep default compression level for first artifact --- .github/workflows/build_test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index a513eb37..38b52ce9 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -97,7 +97,6 @@ jobs: install build/test/odr_test build/test/Release/odr_test.exe - compression-level: 0 - name: Artifact .conan2/p dir uses: actions/upload-artifact@v4 From 9c7b4ab79d8fd5c09b88ad45dd7b9a495f9b26fe Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 3 Sep 2024 01:33:18 +0300 Subject: [PATCH 88/97] Remove previously added rdynamic cxxflag and exception stacktrace printer --- CMakeLists.txt | 2 +- test/src/pdf2htmlEX_wrapper_test.cpp | 33 +++++----------------------- 2 files changed, 7 insertions(+), 28 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f0eec36..ce033240 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR # using clang or gcc set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g -rdynamic") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") # debugging #set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_GLIBCXX_DEBUG") diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index 8aaf123e..4ae993b1 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -49,33 +48,13 @@ TEST_P(pdf2htmlEXWrapperTests, html) { password = "sample-user-password"; } - std::cout << "Calling pdf2htmlEX_wrapper" << std::endl << std::flush; - try { - Html html = odr::internal::html::pdf2htmlEX_wrapper( - test_file.path, output_path, config, password); - std::cout << "Returned from pdf2htmlEX_wrapper" << std::endl << std::flush; - for (const HtmlPage &html_page : html.pages()) { - EXPECT_TRUE(fs::is_regular_file(html_page.path)); - EXPECT_LT(0, fs::file_size(html_page.path)); - } - } catch (const std::exception &e) { - std::cerr << "Exception in pdf2htmlEX_wrapper: " << std::endl - << e.what() << std::endl - << std::flush; - - void *array[10]; - int size = backtrace(array, 10); - char **symbols = backtrace_symbols(array, size); - for (int i = 0; i < size; i++) { - std::cerr << symbols[i] << std::endl; - } - free(symbols); - std::cerr << std::flush; - sleep(2); - - throw e; + Html html = odr::internal::html::pdf2htmlEX_wrapper( + test_file.path, output_path, config, password); + std::cout << "Returned from pdf2htmlEX_wrapper" << std::endl << std::flush; + for (const HtmlPage &html_page : html.pages()) { + EXPECT_TRUE(fs::is_regular_file(html_page.path)); + EXPECT_LT(0, fs::file_size(html_page.path)); } - std::cerr << "End of test" << std::endl << std::flush; } INSTANTIATE_TEST_SUITE_P(pdf2htmlEX_test_files, pdf2htmlEXWrapperTests, From 8758438aea5fa86deb7e06b76be4b6252e107a61 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 3 Sep 2024 17:00:53 +0300 Subject: [PATCH 89/97] Update submodules --- test/data/input/odr-private | 2 +- test/data/reference-output/odr-private | 2 +- test/data/reference-output/odr-public | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/data/input/odr-private b/test/data/input/odr-private index 508550b9..a997171b 160000 --- a/test/data/input/odr-private +++ b/test/data/input/odr-private @@ -1 +1 @@ -Subproject commit 508550b99ed8f2300b33baba0219468ea1ba4c5d +Subproject commit a997171b727f230c4a81421d43e2ed62f37b94ca diff --git a/test/data/reference-output/odr-private b/test/data/reference-output/odr-private index 1b54e452..e3b3a585 160000 --- a/test/data/reference-output/odr-private +++ b/test/data/reference-output/odr-private @@ -1 +1 @@ -Subproject commit 1b54e452350216edfe09dfd697af002add29fa87 +Subproject commit e3b3a585799191815b8eca12b816676ee180170e diff --git a/test/data/reference-output/odr-public b/test/data/reference-output/odr-public index 6138deea..85104973 160000 --- a/test/data/reference-output/odr-public +++ b/test/data/reference-output/odr-public @@ -1 +1 @@ -Subproject commit 6138deea822cc17e940181fe3d99b6b6aef64551 +Subproject commit 851049738a6d3063d6f4e3b4eedd03e6063ce1d2 From afbac96eaf9bc4211724fe621b2e6acbc534dadf Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 3 Sep 2024 17:16:24 +0300 Subject: [PATCH 90/97] Remove pdf2htmlEX ad wvWare from reference outputs, when running tests on NOT linux --- .github/workflows/build_test.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index 38b52ce9..b976ed16 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -185,6 +185,18 @@ jobs: token: ${{ secrets.PAT_ANDIWAND }} submodules: true + # @TODO: Solve this somehow better + # pdf2htmlEX and wvWare is available only on Linux (and Android) + # This means that odr_test will not produce outputs pdf2htmlEX and wvWare outputs to match what's in + # reference-outputs directory. Remove these from reference outputs, for now. + - name: Remove pdf2htmlEX and wvWare from reference outputs + if: runner.os != 'Linux' + run: | + rm -r \ + test/data/reference-output/odr-private/output/pdf2htmlEX \ + test/data/reference-output/odr-public/output/pdf2htmlEX \ + test/data/reference-output/odr-public/output/wvWare + - name: ubuntu install tidy if: runner.os == 'Linux' run: sudo apt install tidy From 5ea4cc1ff3712c46cfceab0192b4363c8c051905 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Tue, 3 Sep 2024 17:25:37 +0300 Subject: [PATCH 91/97] Reduce instrumentation in pdf2htmlEX_wrapper and _test --- src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 6 ------ test/src/pdf2htmlEX_wrapper_test.cpp | 1 - 2 files changed, 7 deletions(-) diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index 31046e93..c85a9052 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -17,24 +17,18 @@ static void ensure_env_vars() { if (nullptr == pdf2htmlEX_data_dir) { pdf2htmlEX_data_dir = PDF2HTMLEX_DATA_DIR; setenv("PDF2HTMLEX_DATA_DIR", pdf2htmlEX_data_dir, 0); - std::cout << "PDF2HTMLEX_DATA_DIR set to " << getenv("PDF2HTMLEX_DATA_DIR") - << std::endl; } static const char *poppler_data_dir = getenv("POPPLER_DATA_DIR"); if (nullptr == poppler_data_dir) { poppler_data_dir = POPPLER_DATA_DIR; setenv("POPPLER_DATA_DIR", poppler_data_dir, 0); - std::cout << "POPPLER_DATA_DIR set to " << getenv("POPPLER_DATA_DIR") - << std::endl; } static const char *fontconfig_path = getenv("FONTCONFIG_PATH"); if (nullptr == fontconfig_path) { fontconfig_path = FONTCONFIG_PATH; setenv("FONTCONFIG_PATH", fontconfig_path, 0); - std::cout << "FONTCONFIG_PATH set to " << getenv("FONTCONFIG_PATH") - << std::endl; } } diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index 4ae993b1..9287d31d 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -50,7 +50,6 @@ TEST_P(pdf2htmlEXWrapperTests, html) { Html html = odr::internal::html::pdf2htmlEX_wrapper( test_file.path, output_path, config, password); - std::cout << "Returned from pdf2htmlEX_wrapper" << std::endl << std::flush; for (const HtmlPage &html_page : html.pages()) { EXPECT_TRUE(fs::is_regular_file(html_page.path)); EXPECT_LT(0, fs::file_size(html_page.path)); From 65c3c9bb11f8e2b2cdb4da21f550fbaeace5bd01 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Wed, 4 Sep 2024 18:32:57 +0300 Subject: [PATCH 92/97] Call target_sources instead of appending source files --- CMakeLists.txt | 12 ++---------- test/CMakeLists.txt | 14 ++------------ 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ce033240..d804a522 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,16 +175,6 @@ set(ODR_SOURCE_FILES "src/odr/internal/zip/zip_file.cpp" "src/odr/internal/zip/zip_util.cpp" ) -if(WITH_PDF2HTMLEX) - LIST(APPEND ODR_SOURCE_FILES - "src/odr/internal/html/pdf2htmlEX_wrapper.cpp" - ) -endif(WITH_PDF2HTMLEX) -if(WITH_WVWARE) - LIST(APPEND ODR_SOURCE_FILES - "src/odr/internal/html/wvWare_wrapper.cpp" - ) -endif(WITH_WVWARE) add_library(odr ${ODR_SOURCE_FILES}) set_target_properties(odr PROPERTIES OUTPUT_NAME odr) @@ -205,10 +195,12 @@ target_link_libraries(odr ) if(WITH_PDF2HTMLEX) + target_sources(odr PRIVATE "src/odr/internal/html/pdf2htmlEX_wrapper.cpp") find_package(pdf2htmlEX REQUIRED) target_link_libraries(odr PRIVATE pdf2htmlex::pdf2htmlex) endif(WITH_PDF2HTMLEX) if(WITH_WVWARE) + target_sources(odr PRIVATE "src/odr/internal/html/wvWare_wrapper.cpp") find_package(wvware REQUIRED) target_link_libraries(odr PRIVATE wvware::wvware) endif(WITH_WVWARE) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4b4fddfe..71e50e56 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,16 +7,6 @@ configure_file("src/test_constants.cpp.in" "src/test_constants.cpp") enable_testing() -if(WITH_PDF2HTMLEX) - LIST(APPEND ODR_TEST_SOURCE_FILES - "src/pdf2htmlEX_wrapper_test.cpp" - ) -endif(WITH_PDF2HTMLEX) -if(WITH_WVWARE) - LIST(APPEND ODR_TEST_SOURCE_FILES - "src/wvWare_wrapper_test.cpp" - ) -endif(WITH_WVWARE) add_executable(odr_test "src/test_util.cpp" "${CMAKE_CURRENT_BINARY_DIR}/src/test_constants.cpp" @@ -54,8 +44,6 @@ add_executable(odr_test "src/internal/zip/miniz_test.cpp" "src/internal/zip/zip_archive_test.cpp" - - ${ODR_TEST_SOURCE_FILES} ) target_include_directories(odr_test PRIVATE @@ -77,9 +65,11 @@ target_link_libraries(odr_test ) if(WITH_PDF2HTMLEX) + target_sources(odr_test PRIVATE "src/pdf2htmlEX_wrapper_test.cpp") target_link_libraries(odr_test PRIVATE pdf2htmlex::pdf2htmlex) endif(WITH_PDF2HTMLEX) if(WITH_WVWARE) + target_sources(odr_test PRIVATE "src/wvWare_wrapper_test.cpp") target_link_libraries(odr_test PRIVATE wvware::wvware) endif(WITH_WVWARE) From 0f694698c64fdca8c07544153be49272bcadedba Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Wed, 4 Sep 2024 22:45:48 +0300 Subject: [PATCH 93/97] Move pdf2htmlEX and wvWare test output to output-pdf2htmlEX and output-wvWare --- .github/workflows/build_test.yml | 48 +++++++++++++++++++------- test/data/reference-output/odr-private | 2 +- test/data/reference-output/odr-public | 2 +- test/src/pdf2htmlEX_wrapper_test.cpp | 3 +- test/src/wvWare_wrapper_test.cpp | 3 +- 5 files changed, 40 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build_test.yml b/.github/workflows/build_test.yml index b976ed16..2bc94e40 100644 --- a/.github/workflows/build_test.yml +++ b/.github/workflows/build_test.yml @@ -185,18 +185,6 @@ jobs: token: ${{ secrets.PAT_ANDIWAND }} submodules: true - # @TODO: Solve this somehow better - # pdf2htmlEX and wvWare is available only on Linux (and Android) - # This means that odr_test will not produce outputs pdf2htmlEX and wvWare outputs to match what's in - # reference-outputs directory. Remove these from reference outputs, for now. - - name: Remove pdf2htmlEX and wvWare from reference outputs - if: runner.os != 'Linux' - run: | - rm -r \ - test/data/reference-output/odr-private/output/pdf2htmlEX \ - test/data/reference-output/odr-public/output/pdf2htmlEX \ - test/data/reference-output/odr-public/output/wvWare - - name: ubuntu install tidy if: runner.os == 'Linux' run: sudo apt install tidy @@ -259,6 +247,42 @@ jobs: test/data/reference-output/odr-private/output \ build/test/output/odr-private/output + - name: tidy pdf2htmlEX test outputs + if: runner.os == 'Linux' + run: | + python3 -u test/scripts/tidy_output.py build/test/output/odr-public/output-pdf2htmlEX + python3 -u test/scripts/tidy_output.py build/test/output/odr-private/output-pdf2htmlEX + - name: Compare pdf2htmlEX public test results + if: runner.os == 'Linux' + run: | + python3 -u test/scripts/compare_output.py \ + --driver firefox \ + --max-workers 1 \ + test/data/reference-output/odr-public/output-pdf2htmlEX \ + build/test/output/odr-public/output-pdf2htmlEX + - name: Compare pdf2htmlEX private test results + if: runner.os == 'Linux' + run: | + python3 -u test/scripts/compare_output.py \ + --driver firefox \ + --max-workers 1 \ + test/data/reference-output/odr-public/output-pdf2htmlEX \ + build/test/output/odr-public/output-pdf2htmlEX + + # wvWare has no private test data + - name: tidy wvWare test outputs + if: runner.os == 'Linux' + run: | + python3 -u test/scripts/tidy_output.py build/test/output/odr-public/output-wvWare + - name: Compare wvWare public test results + if: runner.os == 'Linux' + run: | + python3 -u test/scripts/compare_output.py \ + --driver firefox \ + --max-workers 1 \ + test/data/reference-output/odr-public/output-wvWare \ + build/test/output/odr-public/output-wvWare + build-test-downstream: runs-on: ${{ matrix.os }} strategy: diff --git a/test/data/reference-output/odr-private b/test/data/reference-output/odr-private index e3b3a585..b1d06179 160000 --- a/test/data/reference-output/odr-private +++ b/test/data/reference-output/odr-private @@ -1 +1 @@ -Subproject commit e3b3a585799191815b8eca12b816676ee180170e +Subproject commit b1d061790ee59b5ded4c3b970dd0a5c453d65b96 diff --git a/test/data/reference-output/odr-public b/test/data/reference-output/odr-public index 85104973..c3b3d0b1 160000 --- a/test/data/reference-output/odr-public +++ b/test/data/reference-output/odr-public @@ -1 +1 @@ -Subproject commit 851049738a6d3063d6f4e3b4eedd03e6063ce1d2 +Subproject commit c3b3d0b160c4bb34ee3ca9b7e61cff504335cbc5 diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index 9287d31d..b2fbb863 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -26,8 +26,7 @@ TEST_P(pdf2htmlEXWrapperTests, html) { const std::string test_repo = *common::Path(test_file_path).begin(); const std::string output_path_prefix = common::Path("output") .join(test_repo) - .join("output") - .join("pdf2htmlEX") + .join("output-pdf2htmlEX") .string(); const std::string output_path = common::Path(output_path_prefix) diff --git a/test/src/wvWare_wrapper_test.cpp b/test/src/wvWare_wrapper_test.cpp index 17fc1ba0..43b87fce 100644 --- a/test/src/wvWare_wrapper_test.cpp +++ b/test/src/wvWare_wrapper_test.cpp @@ -26,8 +26,7 @@ TEST_P(wvWareWrapperTests, html) { const std::string test_repo = *common::Path(test_file_path).begin(); const std::string output_path_prefix = common::Path("output") .join(test_repo) - .join("output") - .join("wvWare") + .join("output-wvWare") .string(); const std::string output_path = common::Path(output_path_prefix) From a744ecdcf1793ed4232b547de8940232aac368cc Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Wed, 4 Sep 2024 22:48:57 +0300 Subject: [PATCH 94/97] Remove extra empty line that was previously added --- test/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 71e50e56..e8f623e9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -6,7 +6,6 @@ set(ODR_TEST_DATA_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/data") configure_file("src/test_constants.cpp.in" "src/test_constants.cpp") enable_testing() - add_executable(odr_test "src/test_util.cpp" "${CMAKE_CURRENT_BINARY_DIR}/src/test_constants.cpp" From 637105e7cbb5a7e5a3880a88e4fb479a4af53588 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Wed, 4 Sep 2024 22:54:10 +0300 Subject: [PATCH 95/97] Formatting --- test/src/pdf2htmlEX_wrapper_test.cpp | 6 ++---- test/src/wvWare_wrapper_test.cpp | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/test/src/pdf2htmlEX_wrapper_test.cpp b/test/src/pdf2htmlEX_wrapper_test.cpp index b2fbb863..e29c82c3 100644 --- a/test/src/pdf2htmlEX_wrapper_test.cpp +++ b/test/src/pdf2htmlEX_wrapper_test.cpp @@ -24,10 +24,8 @@ TEST_P(pdf2htmlEXWrapperTests, html) { const TestFile test_file = TestData::test_file(test_file_path); const std::string test_repo = *common::Path(test_file_path).begin(); - const std::string output_path_prefix = common::Path("output") - .join(test_repo) - .join("output-pdf2htmlEX") - .string(); + const std::string output_path_prefix = + common::Path("output").join(test_repo).join("output-pdf2htmlEX").string(); const std::string output_path = common::Path(output_path_prefix) .join(common::Path(test_file_path).rebase(test_repo)) diff --git a/test/src/wvWare_wrapper_test.cpp b/test/src/wvWare_wrapper_test.cpp index 43b87fce..d3d45252 100644 --- a/test/src/wvWare_wrapper_test.cpp +++ b/test/src/wvWare_wrapper_test.cpp @@ -24,10 +24,8 @@ TEST_P(wvWareWrapperTests, html) { const TestFile test_file = TestData::test_file(test_file_path); const std::string test_repo = *common::Path(test_file_path).begin(); - const std::string output_path_prefix = common::Path("output") - .join(test_repo) - .join("output-wvWare") - .string(); + const std::string output_path_prefix = + common::Path("output").join(test_repo).join("output-wvWare").string(); const std::string output_path = common::Path(output_path_prefix) .join(common::Path(test_file_path).rebase(test_repo)) From f97577352bcfee8aa1838768baaa65b93c781778 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 5 Sep 2024 01:16:46 +0300 Subject: [PATCH 96/97] Add pdf2htmlEX workaround to screenshotter in html_render_diff --- test/scripts/html_render_diff.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/test/scripts/html_render_diff.py b/test/scripts/html_render_diff.py index 5a601263..a945e78f 100755 --- a/test/scripts/html_render_diff.py +++ b/test/scripts/html_render_diff.py @@ -5,8 +5,13 @@ import sys import argparse import io +import time + from PIL import Image, ImageChops from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions +from selenium.webdriver.support.ui import WebDriverWait import pathlib @@ -19,8 +24,25 @@ def to_url(something): def screenshot(browser, url): browser.get(url) - body = browser.find_element('tag name', 'body') - png = body.screenshot_as_png + target_find_by = By.TAG_NAME + target = 'body' + loaded_page_settling_time = 0 + + # Selenium doesn't like when we try to screenshot element of documents generated by pdf2htmlEX + if 'output-pdf2htmlEX' in url: + target_find_by = By.ID + target = 'page-container' + loaded_page_settling_time = 1 + + web_driver_wait = WebDriverWait(browser, 5) + web_driver_wait.until(expected_conditions.presence_of_element_located((target_find_by, target))) + web_driver_wait.until(lambda driver: driver.execute_script("return document.readyState") == "complete") + if loaded_page_settling_time != 0: + time.sleep(loaded_page_settling_time) + + target_element = browser.find_element(target_find_by, target) + + png = target_element.screenshot_as_png return Image.open(io.BytesIO(png)) From b945cbe2143d878c3f6a1f789db7b4234800fce0 Mon Sep 17 00:00:00 2001 From: Vilius Sutkus '89 Date: Thu, 5 Sep 2024 04:13:32 +0300 Subject: [PATCH 97/97] Update PDF2HTMLEX_DATA_DIR, POPPLER_DATA_DIR, FONTCONFIG_PATH and WVDATADIR setting mechanism --- conanfile.py | 2 +- src/odr/internal/html/pdf2htmlEX_wrapper.cpp | 34 ++++++++------------ src/odr/internal/html/wvWare_wrapper.cpp | 12 ++----- 3 files changed, 17 insertions(+), 31 deletions(-) diff --git a/conanfile.py b/conanfile.py index 067af3a4..54ad2160 100644 --- a/conanfile.py +++ b/conanfile.py @@ -38,7 +38,7 @@ def requirements(self): self.requires("uchardet/0.0.8") self.requires("utfcpp/4.0.4") if self.options.get_safe("with_pdf2htmlEX"): - self.requires("pdf2htmlex/0.18.8.rc1-20240814-git") + self.requires("pdf2htmlex/0.18.8.rc1-20240905-git") if self.options.get_safe("with_wvWare"): self.requires("wvware/1.2.9") diff --git a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp index c85a9052..169821f8 100644 --- a/src/odr/internal/html/pdf2htmlEX_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlEX_wrapper.cpp @@ -8,37 +8,29 @@ #include +#include #include namespace odr::internal { -static void ensure_env_vars() { - static const char *pdf2htmlEX_data_dir = getenv("PDF2HTMLEX_DATA_DIR"); - if (nullptr == pdf2htmlEX_data_dir) { - pdf2htmlEX_data_dir = PDF2HTMLEX_DATA_DIR; - setenv("PDF2HTMLEX_DATA_DIR", pdf2htmlEX_data_dir, 0); - } - - static const char *poppler_data_dir = getenv("POPPLER_DATA_DIR"); - if (nullptr == poppler_data_dir) { - poppler_data_dir = POPPLER_DATA_DIR; - setenv("POPPLER_DATA_DIR", poppler_data_dir, 0); - } - - static const char *fontconfig_path = getenv("FONTCONFIG_PATH"); - if (nullptr == fontconfig_path) { - fontconfig_path = FONTCONFIG_PATH; - setenv("FONTCONFIG_PATH", fontconfig_path, 0); - } -} - Html html::pdf2htmlEX_wrapper(const std::string &input_path, const std::string &output_path, const HtmlConfig &config, std::optional &password) { - ensure_env_vars(); + static const char *fontconfig_path = getenv("FONTCONFIG_PATH"); + if (nullptr == fontconfig_path) { + // Storage is allocated and after successful putenv, it will never be freed. + // This is the way of putenv. + char *storage = strdup("FONTCONFIG_PATH=" FONTCONFIG_PATH); + if (0 != putenv(storage)) { + free(storage); + } + fontconfig_path = getenv("FONTCONFIG_PATH"); + } pdf2htmlEX::pdf2htmlEX pdf2htmlEX; + pdf2htmlEX.setDataDir(PDF2HTMLEX_DATA_DIR); + pdf2htmlEX.setPopplerDataDir(POPPLER_DATA_DIR); pdf2htmlEX.setInputFilename(input_path); pdf2htmlEX.setDestinationDir(output_path); diff --git a/src/odr/internal/html/wvWare_wrapper.cpp b/src/odr/internal/html/wvWare_wrapper.cpp index 6093fd68..1908836f 100644 --- a/src/odr/internal/html/wvWare_wrapper.cpp +++ b/src/odr/internal/html/wvWare_wrapper.cpp @@ -9,18 +9,12 @@ namespace odr::internal::html { -static void ensure_env_vars() { - static const char *wvdatadir = getenv("WVDATADIR"); - if (nullptr == wvdatadir) { - wvdatadir = WVDATADIR; - setenv("WVDATADIR", wvdatadir, 0); - } -} - Html wvWare_wrapper(const std::string &input_path, const std::string &output_path, const HtmlConfig &config, std::optional &password) { - ensure_env_vars(); + if (nullptr == g_wvDataDir) { + g_wvDataDir = WVDATADIR; + } auto output_file_path = output_path + "/document.html";