diff --git a/pkgs/development/python-modules/docling-parse/default.nix b/pkgs/development/python-modules/docling-parse/default.nix new file mode 100644 index 0000000000000..b7fed63a2318d --- /dev/null +++ b/pkgs/development/python-modules/docling-parse/default.nix @@ -0,0 +1,80 @@ +{ + lib, + buildPythonPackage, + fetchFromGitHub, + cmake, + pkg-config, + cxxopts, + poetry-core, + pybind11, + tabulate, + zlib, + nlohmann_json, + utf8cpp, + libjpeg, + qpdf, + loguru-cpp, + pytestCheckHook, +}: + +buildPythonPackage rec { + pname = "docling-parse"; + version = "2.0.2"; + pyproject = true; + + src = fetchFromGitHub { + owner = "DS4SD"; + repo = "docling-parse"; + rev = "v${version}"; + hash = "sha256-unXGmMp5xyRCqSoFmqcQAZOBzpE0EzgEEBIfZUHhRcQ="; + }; + + dontUseCmakeConfigure = true; + + build-system = [ + cmake + poetry-core + pybind11 + pkg-config + ]; + + CXXFLAGS = [ + "-I${lib.getDev utf8cpp}/include/utf8cpp" + ]; + + buildInputs = [ + cxxopts + libjpeg + loguru-cpp + nlohmann_json + qpdf + utf8cpp + zlib + ]; + + env.USE_SYSTEM_DEPS = true; + + cmakeFlags = [ + "-DUSE_SYSTEM_DEPS=True" + ]; + + dependencies = [ + tabulate + ]; + + pythonImportsCheck = [ + "docling_parse" + ]; + + nativeCheckInputs = [ + pytestCheckHook + ]; + + meta = { + changelog = "https://github.com/DS4SD/docling-parse/blob/${src.rev}/CHANGELOG.md"; + description = "Simple package to extract text with coordinates from programmatic PDFs"; + homepage = "https://github.com/DS4SD/docling-parse"; + license = lib.licenses.mit; + maintainers = with lib.maintainers; [ drupol ]; + }; +} diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index a9ba637bc5daa..42d5a3b7391ef 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -3680,6 +3680,10 @@ self: super: with self; { dockerspawner = callPackage ../development/python-modules/dockerspawner { }; + docling-parse = callPackage ../development/python-modules/docling-parse { + loguru-cpp = pkgs.loguru; + }; + docloud = callPackage ../development/python-modules/docloud { }; docstr-coverage = callPackage ../development/python-modules/docstr-coverage { };