Skip to content

Commit

Permalink
Build with pdf2htmlEX (#380)
Browse files Browse the repository at this point in the history
  • Loading branch information
ViliusSutkus89 authored Sep 14, 2024
1 parent c975dd9 commit f1e1007
Show file tree
Hide file tree
Showing 18 changed files with 456 additions and 22 deletions.
50 changes: 50 additions & 0 deletions .github/workflows/build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ jobs:
build/test/odr_test
build/test/Release/odr_test.exe
- name: Artifact .conan2/p dir
uses: actions/upload-artifact@v4
with:
name: conan2-${{ matrix.os }}-${{ matrix.compiler }}
path: ~/.conan2/p
if-no-files-found: error
compression-level: 0

docker:
needs: build
runs-on: ${{ matrix.os }}
Expand Down Expand Up @@ -197,6 +205,12 @@ jobs:
name: bin-${{ matrix.os }}-${{ matrix.compiler }}
path: .

- name: Download .conan2/p dir
uses: actions/download-artifact@v4
with:
name: conan2-${{ matrix.os }}-${{ matrix.compiler }}
path: ~/.conan2/p

- name: fix artifact permissions
if: runner.os != 'Windows'
run: chmod +x build/test/odr_test
Expand Down Expand Up @@ -233,6 +247,42 @@ jobs:
test/data/reference-output/odr-private/output \
build/test/output/odr-private/output
- name: tidy pdf2htmlEX test outputs
if: runner.os == 'Linux'
run: |
python3 -u test/scripts/tidy_output.py build/test/output/odr-public/output-pdf2htmlEX
python3 -u test/scripts/tidy_output.py build/test/output/odr-private/output-pdf2htmlEX
- name: Compare pdf2htmlEX public test results
if: runner.os == 'Linux'
run: |
python3 -u test/scripts/compare_output.py \
--driver firefox \
--max-workers 1 \
test/data/reference-output/odr-public/output-pdf2htmlEX \
build/test/output/odr-public/output-pdf2htmlEX
- name: Compare pdf2htmlEX private test results
if: runner.os == 'Linux'
run: |
python3 -u test/scripts/compare_output.py \
--driver firefox \
--max-workers 1 \
test/data/reference-output/odr-public/output-pdf2htmlEX \
build/test/output/odr-public/output-pdf2htmlEX
# wvWare has no private test data
- name: tidy wvWare test outputs
if: runner.os == 'Linux'
run: |
python3 -u test/scripts/tidy_output.py build/test/output/odr-public/output-wvWare
- name: Compare wvWare public test results
if: runner.os == 'Linux'
run: |
python3 -u test/scripts/compare_output.py \
--driver firefox \
--max-workers 1 \
test/data/reference-output/odr-public/output-wvWare \
build/test/output/odr-public/output-wvWare
build-test-downstream:
runs-on: ${{ matrix.os }}
strategy:
Expand Down
20 changes: 18 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@ set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

option(BUILD_SHARED_LIBS "Build using shared libraries" ON)
option(ODR_TEST "enable tests" OFF)
option(ODR_CLI "enable command line interface" ON)
option(ODR_CLANG_TIDY "Run clang-tidy static analysis" OFF)
option(WITH_PDF2HTMLEX "Build with pdf2htmlEX" ON)
option(WITH_WVWARE "Build with wvWare" ON)

# TODO defining global compiler flags seems to be bad practice with conan
# TODO consider using conan profiles
Expand Down Expand Up @@ -37,6 +40,7 @@ find_package(uchardet REQUIRED)
find_package(utf8cpp REQUIRED)

configure_file("src/odr/internal/project_info.cpp.in" "src/odr/internal/project_info.cpp")
configure_file("src/odr/internal/project_info.hpp.in" "src/odr/internal/project_info.hpp")

set(PRE_CONFIGURE_FILE "src/odr/internal/git_info.cpp.in")
set(POST_CONFIGURE_FILE "${CMAKE_CURRENT_BINARY_DIR}/src/odr/internal/git_info.cpp")
Expand Down Expand Up @@ -177,6 +181,7 @@ set_target_properties(odr PROPERTIES OUTPUT_NAME odr)
target_include_directories(odr
PUBLIC
src
${CMAKE_CURRENT_BINARY_DIR}/src
)
target_link_libraries(odr
PRIVATE
Expand All @@ -189,6 +194,17 @@ target_link_libraries(odr
utf8::cpp
)

if(WITH_PDF2HTMLEX)
target_sources(odr PRIVATE "src/odr/internal/html/pdf2htmlEX_wrapper.cpp")
find_package(pdf2htmlEX REQUIRED)
target_link_libraries(odr PRIVATE pdf2htmlex::pdf2htmlex)
endif(WITH_PDF2HTMLEX)
if(WITH_WVWARE)
target_sources(odr PRIVATE "src/odr/internal/html/wvWare_wrapper.cpp")
find_package(wvware REQUIRED)
target_link_libraries(odr PRIVATE wvware::wvware)
endif(WITH_WVWARE)

if (EXISTS "${PROJECT_SOURCE_DIR}/.git")
add_dependencies(odr check_git)
endif ()
Expand All @@ -206,8 +222,8 @@ if (ODR_CLANG_TIDY)
endif ()

install(
DIRECTORY src/
DESTINATION include/
DIRECTORY src/ ${CMAKE_CURRENT_BINARY_DIR}/src/
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING PATTERN "*.hpp"
)
install(
Expand Down
32 changes: 25 additions & 7 deletions conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from conan import ConanFile
from conan.tools.build import check_min_cppstd
from conan.tools.cmake import CMakeToolchain, CMakeDeps, CMake
from conan.tools.env import Environment
from conan.tools.env.environment import EnvVars
from conan.tools.files import copy


Expand All @@ -19,6 +21,8 @@ class OpenDocumentCoreConan(ConanFile):
options = {
"shared": [True, False],
"fPIC": [True, False],
"with_pdf2htmlEX": [True, False],
"with_wvWare": [True, False],
}
default_options = {
"shared": False,
Expand All @@ -33,6 +37,10 @@ def requirements(self):
self.requires("vincentlaucsb-csv-parser/2.3.0")
self.requires("uchardet/0.0.8")
self.requires("utfcpp/4.0.4")
if self.options.get_safe("with_pdf2htmlEX"):
self.requires("pdf2htmlex/0.18.8.rc1-20240905-git")
if self.options.get_safe("with_wvWare"):
self.requires("wvware/1.2.9")

def build_requirements(self):
self.test_requires("gtest/1.14.0")
Expand All @@ -47,6 +55,9 @@ def config_options(self):
if self.settings.os == "Windows":
del self.options.fPIC

self.options.with_pdf2htmlEX = self.settings.os not in ["Windows", "Macos"]
self.options.with_wvWare = self.settings.os not in ["Windows", "Macos"]

def configure(self):
if self.options.shared:
self.options.rm_safe("fPIC")
Expand All @@ -55,6 +66,20 @@ def generate(self):
tc = CMakeToolchain(self)
tc.variables["CMAKE_PROJECT_VERSION"] = self.version
tc.variables["ODR_TEST"] = False
tc.variables["WITH_PDF2HTMLEX"] = self.options.get_safe("with_pdf2htmlEX", False)
tc.variables["WITH_WVWARE"] = self.options.get_safe("with_wvWare", False)

# Get runenv info, exported by package_info() of dependencies
# We need to obtain PDF2HTMLEX_DATA_DIR, POPPLER_DATA_DIR, FONTCONFIG_PATH and WVDATADIR
runenv_info = Environment()
deps = self.dependencies.host.topological_sort
deps = [dep for dep in reversed(deps.values())]
for dep in deps:
runenv_info.compose_env(dep.runenv_info)
envvars = runenv_info.vars(self)
for v in ["PDF2HTMLEX_DATA_DIR", "POPPLER_DATA_DIR", "FONTCONFIG_PATH", "WVDATADIR"]:
tc.variables[v] = envvars.get(v)

tc.generate()

deps = CMakeDeps(self)
Expand All @@ -66,13 +91,6 @@ def build(self):
cmake.build()

def package(self):
copy(
self,
"*.hpp",
src=os.path.join(self.recipe_folder, "src"),
dst=os.path.join(self.export_sources_folder, "include"),
)

cmake = CMake(self)
cmake.install()

Expand Down
64 changes: 64 additions & 0 deletions src/odr/internal/html/pdf2htmlEX_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include <odr/internal/html/pdf2htmlEX_wrapper.hpp>

#include <odr/exceptions.hpp>
#include <odr/file.hpp>
#include <odr/html.hpp>

#include <odr/internal/common/file.hpp>

#include <pdf2htmlEX.h>

#include <cstring>
#include <odr/internal/project_info.hpp>

namespace odr::internal {

Html html::pdf2htmlEX_wrapper(const std::string &input_path,
const std::string &output_path,
const HtmlConfig &config,
std::optional<std::string> &password) {
static const char *fontconfig_path = getenv("FONTCONFIG_PATH");
if (nullptr == fontconfig_path) {
// Storage is allocated and after successful putenv, it will never be freed.
// This is the way of putenv.
char *storage = strdup("FONTCONFIG_PATH=" FONTCONFIG_PATH);
if (0 != putenv(storage)) {
free(storage);
}
fontconfig_path = getenv("FONTCONFIG_PATH");
}

pdf2htmlEX::pdf2htmlEX pdf2htmlEX;
pdf2htmlEX.setDataDir(PDF2HTMLEX_DATA_DIR);
pdf2htmlEX.setPopplerDataDir(POPPLER_DATA_DIR);

pdf2htmlEX.setInputFilename(input_path);
pdf2htmlEX.setDestinationDir(output_path);
auto output_file_name = "document.html";
pdf2htmlEX.setOutputFilename(output_file_name);

pdf2htmlEX.setDRM(false);
pdf2htmlEX.setProcessOutline(false);
pdf2htmlEX.setProcessAnnotation(true);

if (password.has_value()) {
pdf2htmlEX.setOwnerPassword(password.value());
pdf2htmlEX.setUserPassword(password.value());
}

try {
pdf2htmlEX.convert();
} catch (const pdf2htmlEX::EncryptionPasswordException &e) {
throw WrongPassword();
} catch (const pdf2htmlEX::DocumentCopyProtectedException &e) {
throw std::runtime_error("document is copy protected");
} catch (const pdf2htmlEX::ConversionFailedException &e) {
throw std::runtime_error(std::string("conversion error ") + e.what());
}

return {FileType::portable_document_format,
config,
{{"document", output_path + "/" + output_file_name}}};
}

} // namespace odr::internal
23 changes: 23 additions & 0 deletions src/odr/internal/html/pdf2htmlEX_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef ODR_INTERNAL_PDF2HTMLEX_WRAPPER_HPP
#define ODR_INTERNAL_PDF2HTMLEX_WRAPPER_HPP

#include <optional>
#include <string>

namespace odr {
class PdfFile;

struct HtmlConfig;
class Html;
} // namespace odr

namespace odr::internal::html {

Html pdf2htmlEX_wrapper(const std::string &input_path,
const std::string &output_path,
const HtmlConfig &config,
std::optional<std::string> &password);

}

#endif // ODR_INTERNAL_PDF2HTMLEX_WRAPPER_HPP
52 changes: 52 additions & 0 deletions src/odr/internal/html/wvWare_wrapper.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#include <odr/exceptions.hpp>
#include <odr/file.hpp>
#include <odr/html.hpp>
#include <odr/internal/common/file.hpp>
#include <odr/internal/html/wvWare_wrapper.hpp>
#include <odr/internal/project_info.hpp>
#include <unistd.h>
#include <wv/wv.h>

namespace odr::internal::html {

Html wvWare_wrapper(const std::string &input_path,
const std::string &output_path, const HtmlConfig &config,
std::optional<std::string> &password) {
if (nullptr == g_wvDataDir) {
g_wvDataDir = WVDATADIR;
}

auto output_file_path = output_path + "/document.html";

char *input_file_path = strdup(input_path.c_str());
char *output_dir = strdup(output_path.c_str());

g_htmlOutputFileHandle = fopen(output_file_path.c_str(), "w");

std::string pw;
if (password.has_value()) {
pw = password.value();
}
int retVal = wvHtml_convert(input_file_path, output_dir, pw.c_str());
free(output_dir);
free(input_file_path);
fclose(g_htmlOutputFileHandle);
g_htmlOutputFileHandle = nullptr;

if (0 != retVal) {
unlink(output_file_path.c_str());

switch (retVal) {
case 100: // PasswordRequired
case 101: // Wrong Password
throw WrongPassword();
default:
throw std::runtime_error("Conversion error");
}
}

return {
FileType::legacy_word_document, config, {{"document", output_file_path}}};
}

} // namespace odr::internal::html
22 changes: 22 additions & 0 deletions src/odr/internal/html/wvWare_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef ODR_INTERNAL_WVWARE_WRAPPER_HPP
#define ODR_INTERNAL_WVWARE_WRAPPER_HPP

#include <optional>
#include <string>

namespace odr {
class File;

struct HtmlConfig;
class Html;
} // namespace odr

namespace odr::internal::html {

Html wvWare_wrapper(const std::string &input_path,
const std::string &output_path, const HtmlConfig &config,
std::optional<std::string> &password);

}

#endif // ODR_INTERNAL_WVWARE_WRAPPER_HPP
8 changes: 0 additions & 8 deletions src/odr/internal/project_info.hpp

This file was deleted.

15 changes: 15 additions & 0 deletions src/odr/internal/project_info.hpp.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef ODR_INTERNAL_PROJECT_INFO_HPP
#define ODR_INTERNAL_PROJECT_INFO_HPP

namespace odr::internal::project_info {
const char *version() noexcept;
} // namespace odr::internal::project_info

#cmakedefine WITH_PDF2HTMLEX 1
#cmakedefine PDF2HTMLEX_DATA_DIR "@PDF2HTMLEX_DATA_DIR@"
#cmakedefine POPPLER_DATA_DIR "@POPPLER_DATA_DIR@"
#cmakedefine FONTCONFIG_PATH "@FONTCONFIG_PATH@"
#cmakedefine WITH_WVWARE 1
#cmakedefine WVDATADIR "@WVDATADIR@"

#endif // ODR_INTERNAL_PROJECT_INFO_HPP
Loading

0 comments on commit f1e1007

Please sign in to comment.