diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index af27c3c..d12d862 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -1914,6 +1914,7 @@ def save_as_markdown( indent: int = 4, text_width: int = -1, page_no: Optional[int] = None, + include_picture_contents: bool = False, ): """Save to markdown.""" artifacts_dir, reference_path = self._get_output_paths(filename, artifacts_dir) @@ -1936,6 +1937,7 @@ def save_as_markdown( indent=indent, text_width=text_width, page_no=page_no, + include_picture_contents=include_picture_contents, ) with open(filename, "w") as fw: @@ -1953,6 +1955,7 @@ def export_to_markdown( # noqa: C901 indent: int = 4, text_width: int = -1, page_no: Optional[int] = None, + include_picture_contents: bool = False, ) -> str: r"""Serialize to Markdown. @@ -1991,7 +1994,12 @@ def export_to_markdown( # noqa: C901 in_list = False # Track if we're currently processing list items for ix, (item, level) in enumerate( - self.iterate_items(self.body, with_groups=True, page_no=page_no) + self.iterate_items( + self.body, + with_groups=True, + page_no=page_no, + traverse_pictures=include_picture_contents, + ) ): # If we've moved to a lower level, we're exiting one or more groups if level < previous_level: @@ -2174,6 +2182,7 @@ def save_as_html( page_no: Optional[int] = None, html_lang: str = "en", html_head: str = _HTML_DEFAULT_HEAD, + include_picture_contents: bool = False, ): """Save to HTML.""" artifacts_dir, reference_path = self._get_output_paths(filename, artifacts_dir) @@ -2193,6 +2202,7 @@ def save_as_html( page_no=page_no, html_lang=html_lang, html_head=html_head, + include_picture_contents=include_picture_contents, ) with open(filename, "w") as fw: @@ -2239,6 +2249,7 @@ def export_to_html( # noqa: C901 page_no: Optional[int] = None, html_lang: str = "en", html_head: str = _HTML_DEFAULT_HEAD, + include_picture_contents: bool = False, ) -> str: r"""Serialize to HTML.""" @@ -2271,7 +2282,12 @@ def close_lists( in_ordered_list: List[bool] = [] # False for ix, (item, curr_level) in enumerate( - self.iterate_items(self.body, with_groups=True, page_no=page_no) + self.iterate_items( + self.body, + with_groups=True, + page_no=page_no, + traverse_pictures=include_picture_contents, + ) ): # If we've moved to a lower level, we're exiting one or more groups if curr_level < prev_level and len(in_ordered_list) > 0: @@ -2416,6 +2432,7 @@ def save_as_document_tokens( # specifics page_no: Optional[int] = None, with_groups: bool = True, + include_picture_contents: bool = False, ): r"""Save the document content to a DocumentToken format.""" out = self.export_to_document_tokens( @@ -2435,6 +2452,7 @@ def save_as_document_tokens( # specifics page_no=page_no, with_groups=with_groups, + include_picture_contents=include_picture_contents, ) with open(filename, "w") as fw: @@ -2459,6 +2477,7 @@ def export_to_document_tokens( page_no: Optional[int] = None, with_groups: bool = True, newline: bool = True, + include_picture_contents: bool = False, ) -> str: r"""Exports the document content to a DocumentToken format. @@ -2515,7 +2534,9 @@ def close_lists( result = f"{DocumentToken.BEG_DOCUMENT.value}{delim}" for ix, (item, curr_level) in enumerate( - self.iterate_items(self.body, with_groups=True) + self.iterate_items( + self.body, with_groups=True, traverse_pictures=include_picture_contents + ) ): # If we've moved to a lower level, we're exiting one or more groups