mindee · felixdittrich92 · Sep 1, 2022 · mara004 · Sep 1, 2022 · felixdittrich92
diff --git a/doctr/io/pdf.py b/doctr/io/pdf.py
@@ -4,7 +4,7 @@
 # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
 
 from pathlib import Path
-from typing import Any, List, Optional
+from typing import Any, Iterator, Optional
 
 import numpy as np
 import pypdfium2 as pdfium
@@ -19,7 +19,7 @@ def read_pdf(
     scale: float = 2,
     password: Optional[str] = None,
     **kwargs: Any,
-) -> List[np.ndarray]:
+) -> Iterator[np.ndarray]:
     """Read a PDF file and convert it into an image in numpy format
 
     >>> from doctr.documents import read_pdf
@@ -42,4 +42,5 @@ def read_pdf(
 
     # Rasterise pages to PIL images with pypdfium2 and convert to numpy ndarrays
     with pdfium.PdfDocument(file, password=password) as pdf:
-        return [np.asarray(img) for img in pdf.render_topil(scale=scale, **kwargs)]
+        for img in pdf.render_topil(scale=scale, **kwargs):
+            yield np.asarray(img)
diff --git a/doctr/io/reader.py b/doctr/io/reader.py
@@ -34,7 +34,7 @@ def from_pdf(cls, file: AbstractFile, **kwargs) -> List[np.ndarray]:
             the list of pages decoded as numpy ndarray of shape H x W x 3
         """
 
-        return read_pdf(file, **kwargs)
+        return list(read_pdf(file, **kwargs))
 
     @classmethod
     def from_url(cls, url: str, **kwargs) -> List[np.ndarray]:

diff --git a/tests/common/test_io.py b/tests/common/test_io.py
@@ -15,20 +15,20 @@ def _check_doc_content(doc_tensors, num_pages):
 
 
 def test_read_pdf(mock_pdf):
-    doc = io.read_pdf(mock_pdf)
+    doc = list(io.read_pdf(mock_pdf))
     _check_doc_content(doc, 2)
 
     with open(mock_pdf, "rb") as f:
-        doc = io.read_pdf(f.read())
+        doc = list(io.read_pdf(f.read()))
     _check_doc_content(doc, 2)
 
     # Wrong input type
     with pytest.raises(TypeError):
-        _ = io.read_pdf(123)
+        _ = list(io.read_pdf(123))
 
     # Wrong path
     with pytest.raises(FileNotFoundError):
-        _ = io.read_pdf("my_imaginary_file.pdf")
+        _ = list(io.read_pdf("my_imaginary_file.pdf"))
 
 
 def test_read_img_as_numpy(tmpdir_factory, mock_pdf):