adithya-s-k · cahya-wirawan · Oct 20, 2024
diff --git a/marker_api/routes.py b/marker_api/routes.py
@@ -11,7 +11,7 @@
 
 
 # Function to parse PDF and return markdown, metadata, and image data
-def parse_pdf_and_return_markdown(pdf_file: bytes, extract_images: bool, model_list):
+def parse_pdf_and_return_markdown(pdf_file: bytes, extract_images: bool, model_list, **kwargs):
     """
     Function to parse a PDF and extract text and images.
 
@@ -23,7 +23,7 @@ def parse_pdf_and_return_markdown(pdf_file: bytes, extract_images: bool, model_l
     tuple: A tuple containing the full text, metadata, and image data (if extracted).
     """
     logger.debug("Parsing PDF file")
-    full_text, images, out_meta = convert_single_pdf(pdf_file, model_list)
+    full_text, images, out_meta = convert_single_pdf(pdf_file, model_list, **kwargs)
     logger.debug(f"Images extracted: {list(images.keys())}")
     image_data = {}
     if extract_images:
@@ -48,7 +48,7 @@ def parse_pdf_and_return_markdown(pdf_file: bytes, extract_images: bool, model_l
 
 
 # Function to process a single PDF file
-def process_pdf_file(file_content: bytes, filename: str, model_list):
+def process_pdf_file(file_content: bytes, filename: str, model_list, **kwargs):
     """
     Function to process a single PDF file.
 
@@ -63,7 +63,7 @@ def process_pdf_file(file_content: bytes, filename: str, model_list):
     entry_time = time.time()
     logger.info(f"Entry time for {filename}: {entry_time}")
     markdown_text, metadata, image_data = parse_pdf_and_return_markdown(
-        file_content, extract_images=True, model_list=model_list
+        file_content, extract_images=True, model_list=model_list, **kwargs
     )
     completion_time = time.time()
     logger.info(f"Model processes complete time for {filename}: {completion_time}")

diff --git a/server.py b/server.py
@@ -21,6 +21,7 @@
     ServerType,
 )
 from marker_api.demo import demo_ui
+from typing import Union
 
 # Initialize logging
 configure_logging()
@@ -65,13 +66,17 @@ def server():
 
 # Endpoint to convert a single PDF to markdown
 @app.post("/convert", response_model=ConversionResponse)
-async def convert_pdf_to_markdown(pdf_file: UploadFile):
+async def convert_pdf_to_markdown(pdf_file: UploadFile, max_pages: Union[int, None] = 10,
+                                  start_page: Union[int, None] = 0, langs: Union[str, None] = None,
+                                  batch_multiplier: Union[int, None] =  2):
     """
     Endpoint to convert a single PDF to markdown.
     """
     logger.debug(f"Received file: {pdf_file.filename}")
     file = await pdf_file.read()
-    response = process_pdf_file(file, pdf_file.filename, model_list)
+    response = process_pdf_file(file, pdf_file.filename, model_list,
+                                max_pages=max_pages, start_page=start_page,
+                                langs=langs, batch_multiplier=batch_multiplier)
     return ConversionResponse(status="Success", result=response)