From ae9d758755a337dacc582c5e15433d1627f66e0f Mon Sep 17 00:00:00 2001 From: "David S. Batista" Date: Thu, 31 Oct 2024 14:38:19 +0100 Subject: [PATCH] Update haystack_experimental/components/extractors/llm_metadata_extractor.py Co-authored-by: Sebastian Husch Lee --- .../components/extractors/llm_metadata_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack_experimental/components/extractors/llm_metadata_extractor.py b/haystack_experimental/components/extractors/llm_metadata_extractor.py index 7a1c6a5..13efbb6 100644 --- a/haystack_experimental/components/extractors/llm_metadata_extractor.py +++ b/haystack_experimental/components/extractors/llm_metadata_extractor.py @@ -321,7 +321,7 @@ def run(self, documents: List[Document], page_range: Optional[List[Union[str, in splitter = DocumentSplitter(split_by="page", split_length=1) pages = splitter.run(documents=[document]) - content = [p.content + "\n" for idx, p in enumerate(pages["documents"]) if idx in self.expanded_range] + content = [p.content + "\f" for idx, p in enumerate(pages["documents"]) if (idx + 1) in self.expanded_range] self._extract_metadata_and_update_doc(document, errors, "".join(content))