diff --git a/extract_thinker/document_loader/document_loader.py b/extract_thinker/document_loader/document_loader.py
index 3eb877c..118c492 100644
--- a/extract_thinker/document_loader/document_loader.py
+++ b/extract_thinker/document_loader/document_loader.py
@@ -3,25 +3,36 @@
 from io import BytesIO
 from PIL import Image
 import pypdfium2 as pdfium
-from typing import Any, Dict, Union
+from typing import Any, Dict, Union, List
 from cachetools import TTLCache
 import os
 import magic
 from extract_thinker.utils import get_file_extension, check_mime_type
+from playwright.sync_api import sync_playwright
+from urllib.parse import urlparse
+import base64
+import math
 
 class DocumentLoader(ABC):
-    def __init__(self, content: Any = None, cache_ttl: int = 300):
+    # SUPPORTED_FORMATS = [
+    #     "pdf", "jpg", "jpeg", "png", "tiff", "bmp"
+    # ]
+
+    def __init__(self, content: Any = None, cache_ttl: int = 300, screenshot_timeout: int = 1000):
         """Initialize loader.
         
         Args:
             content: Initial content
             cache_ttl: Cache time-to-live in seconds
+            screenshot_timeout: Timeout in milliseconds to wait for page content load when capturing a screenshot.
         """
         self.content = content
         self.file_path = None
         self.cache = TTLCache(maxsize=100, ttl=cache_ttl)
         self.vision_mode = False
         self.max_image_size = None  # Changed to None by default
+        self.is_url = False  # Indicates if the source is a URL
+        self.screenshot_timeout = screenshot_timeout
 
     def set_max_image_size(self, size: int) -> None:
         """Set the maximum image size."""
@@ -31,6 +42,10 @@ def set_vision_mode(self, enabled: bool = True) -> None:
         """Enable or disable vision mode processing."""
         self.vision_mode = enabled
 
+    def set_screenshot_timeout(self, timeout: int) -> None:
+        """Set the screenshot timeout in milliseconds for capturing a screenshot from a URL."""
+        self.screenshot_timeout = timeout
+
     def can_handle(self, source: Union[str, BytesIO]) -> bool:
         """
         Checks if the loader can handle the given source.
@@ -60,7 +75,6 @@ def _can_handle_file_path(self, file_path: str) -> bool:
     def _can_handle_stream(self, stream: BytesIO) -> bool:
         """Checks if the loader can handle the given BytesIO stream."""
         try:
-            # Read the first few bytes to determine file type
             mime = magic.from_buffer(stream.getvalue(), mime=True)
             stream.seek(0)  # Reset stream position
             return check_mime_type(mime, self.SUPPORTED_FORMATS)
@@ -85,7 +99,26 @@ def convert_to_images(self, file: Union[str, io.BytesIO, io.BufferedReader], sca
             raise TypeError("file must be a file path (str) or a file-like stream")
 
     def _convert_file_to_images(self, file_path: str, scale: float) -> Dict[int, bytes]:
-        # Check if the file is already an image
+        """Convert file to images, handling both URLs and local files."""
+        # Check if it's a URL
+        if self._is_url(file_path):
+            self.is_url = True  # Set the instance variable if the source is a URL
+            try:
+                screenshot = self._capture_screenshot_from_url(file_path)
+                # Convert screenshot to PIL Image for potential resizing
+                img = Image.open(BytesIO(screenshot))
+                img = self._resize_if_needed(img)
+                
+                # Split into vertical chunks
+                chunks = self._split_image_vertically(img)
+                
+                # Return dictionary with chunks as list
+                return {0: chunks}  # All chunks from URL are considered "page 0"
+                
+            except Exception as e:
+                raise ValueError(f"Failed to capture screenshot from URL: {str(e)}")
+        
+        # Existing code for local files...
         try:
             Image.open(file_path)
             is_image = True
@@ -93,11 +126,9 @@ def _convert_file_to_images(self, file_path: str, scale: float) -> Dict[int, byt
             is_image = False
 
         if is_image:
-            # If it is, return it as is
             with open(file_path, "rb") as f:
                 return {0: f.read()}
 
-        # If it's not an image, proceed with the conversion
         return self._convert_pdf_to_images(pdfium.PdfDocument(file_path), scale)
 
     def _convert_stream_to_images(self, file_stream: io.BytesIO, scale: float) -> Dict[int, bytes]:
@@ -163,13 +194,15 @@ def can_handle_vision(self, source: Union[str, BytesIO]) -> bool:
         Checks if the loader can handle the source in vision mode.
         
         Args:
-            source: Either a file path (str) or a BytesIO stream
+            source: Either a file path (str), URL, or a BytesIO stream
             
         Returns:
             bool: True if the loader can handle the source in vision mode
         """
         try:
             if isinstance(source, str):
+                if self._is_url(source):
+                    return True  # URLs are always supported in vision mode
                 ext = get_file_extension(source).lower()
                 return ext in ['pdf', 'jpg', 'jpeg', 'png', 'tiff', 'bmp']
             elif isinstance(source, BytesIO):
@@ -210,4 +243,99 @@ def can_handle_paginate(self, source: Union[str, BytesIO]) -> bool:
             # List of extensions that support pagination
             return ext in ['pdf']
         except Exception:
-            return False  
\ No newline at end of file
+            return False
+
+    @staticmethod
+    def _check_playwright_dependencies():
+        """
+        Check if the playwright dependency is installed.
+        Raises:
+            ImportError: If playwright is not installed.
+        """
+        try:
+            from playwright.sync_api import sync_playwright
+        except ImportError:
+            raise ImportError(
+                "You are using vision with url. You need to install playwright."
+                "`pip install playwright` and run `playwright install`."
+            )
+
+    def _capture_screenshot_from_url(self, url: str) -> bytes:
+        """
+        Captures a full-page screenshot of a URL using Playwright.
+        
+        Args:
+            url: The URL to capture
+            
+        Returns:
+            bytes: The screenshot image data
+        """
+        # Optional: Check if playwright is installed before attempting to use it.
+        self._check_playwright_dependencies()
+        
+        from playwright.sync_api import sync_playwright  # Import after the dependency check
+        
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            page = browser.new_page()
+            
+            try:
+                # Navigate to URL
+                page.goto(url, wait_until='networkidle')
+                
+                # Optional: Handle cookie consent popups (customize selectors as needed)
+                try:
+                    page.click('button:has-text("Accept")', timeout=10000)
+                except Exception:
+                    pass  # Ignore if no cookie banner is found
+                    
+                # Wait for content to load with the configurable timeout
+                page.wait_for_timeout(self.screenshot_timeout)
+                
+                # Capture full page screenshot
+                screenshot = page.screenshot(full_page=True)
+                
+                return screenshot
+                
+            finally:
+                browser.close()
+
+    def _split_image_vertically(self, img: Image.Image, chunk_height: int = 1000) -> List[bytes]:
+        """
+        Splits a tall PIL Image into vertical chunks of `chunk_height`.
+        Returns a list of bytes in PNG format, in top-to-bottom order.
+        
+        Args:
+            img: PIL Image to split
+            chunk_height: Height of each chunk in pixels
+            
+        Returns:
+            List of PNG-encoded bytes for each chunk
+        """
+        width, height = img.size
+        num_chunks = math.ceil(height / chunk_height)
+
+        chunks_bytes = []
+        for i in range(num_chunks):
+            top = i * chunk_height
+            bottom = min((i + 1) * chunk_height, height)
+            crop_box = (0, top, width, bottom)
+            
+            # Crop the chunk
+            chunk_img = img.crop(crop_box)
+            
+            # Convert chunk to bytes
+            chunk_bytes = io.BytesIO()
+            chunk_img.save(chunk_bytes, format="PNG", optimize=True)
+            chunk_bytes.seek(0)
+            chunks_bytes.append(chunk_bytes.read())
+            
+        return chunks_bytes
+
+    def _is_url(self, source: str) -> bool:
+        """Check if the source string is a URL."""
+        try:
+            result = urlparse(source)
+            return bool(result.scheme and result.netloc)
+        except:
+            return False
\ No newline at end of file
diff --git a/extract_thinker/document_loader/document_loader_beautiful_soup.py b/extract_thinker/document_loader/document_loader_beautiful_soup.py
index 8d1d337..5784464 100644
--- a/extract_thinker/document_loader/document_loader_beautiful_soup.py
+++ b/extract_thinker/document_loader/document_loader_beautiful_soup.py
@@ -52,7 +52,9 @@ def __post_init__(self):
 class DocumentLoaderBeautifulSoup(CachedDocumentLoader):
     """Loader that uses BeautifulSoup4 to load HTML content."""
     
-    SUPPORTED_FORMATS = ['html', 'htm']
+    SUPPORTED_FORMATS = [
+        'html', 'htm', 'url'  # Add URL support
+    ]
     
     def __init__(
         self,
@@ -257,9 +259,7 @@ def load(self, source: Union[str, BytesIO]) -> List[Dict[str, Any]]:
             raise ValueError(f"Error loading HTML content: {str(e)}")
 
     def can_handle(self, source: Union[str, BytesIO]) -> bool:
-        """Check if the loader can handle this source."""
-        if isinstance(source, BytesIO):
+        """Override to add URL support."""
+        if isinstance(source, str) and self._is_url(source):
             return True
-        if self._is_url(source):
-            return True
-        return get_file_extension(source) in self.SUPPORTED_FORMATS
\ No newline at end of file
+        return super().can_handle(source)
\ No newline at end of file
diff --git a/extract_thinker/document_loader/document_loader_docling.py b/extract_thinker/document_loader/document_loader_docling.py
index 1ff2a1c..d47783a 100644
--- a/extract_thinker/document_loader/document_loader_docling.py
+++ b/extract_thinker/document_loader/document_loader_docling.py
@@ -1,6 +1,7 @@
 from io import BytesIO
 from typing import Any, Dict, List, Union, Optional
 from dataclasses import dataclass, field
+from urllib.parse import urlparse
 
 from cachetools import cachedmethod
 from cachetools.keys import hashkey
@@ -120,7 +121,9 @@ class DocumentLoaderDocling(CachedDocumentLoader):
         # XML (including PubMed .nxml)
         "xml", "nxml",
         # Plain text
-        "txt"
+        "txt",
+        # URL support
+        "url"
     ]
 
     def __init__(
@@ -212,6 +215,7 @@ def can_handle(self, source: Union[str, BytesIO]) -> bool:
                       self.vision_mode
                   ))
     def load(self, source: Union[str, BytesIO]) -> List[Dict[str, Any]]:
+        from docling.document_converter import ConversionResult
         """
         Load and parse the document using Docling.
         
@@ -219,30 +223,35 @@ def load(self, source: Union[str, BytesIO]) -> List[Dict[str, Any]]:
             A list of dictionaries, each representing a "page" with:
               - "content": text from that page
               - "image": optional image bytes if vision_mode is True
-              - "markdown": Markdown string of that page
         """
         if not self.can_handle(source):
             raise ValueError(f"Cannot handle source: {source}")
 
         # Convert the source to a docling "ConversionResult"
-        conv_result = self._docling_convert(source)
-
-        test = conv_result.document.export_to_markdown()
-        print(test)
+        conv_result: ConversionResult = self._docling_convert(source)
         
-        # Build the output list of page data
+        # If the source is a URL, return a single page with all the content.
+        if isinstance(source, str) and self._is_url(source):
+            content = conv_result.document.export_to_markdown()
+            print(content)  # Log the exported markdown, if needed
+            page_output = {"content": content, "image": None}
+            # Handle image extraction if vision_mode is enabled
+            if self.vision_mode:
+                images_dict = self.convert_to_images(source)
+                page_output["images"] = images_dict.get(0)
+            return [page_output]
+
+        # Build the output list of page data for non-URL sources
         pages_output = []
         for p in conv_result.pages:
             page_dict = {
                 "content": conv_result.document.export_to_markdown(page_no=p.page_no+1),
                 "image": None
             }
-
             # Handle image extraction if vision_mode is enabled
             if self.vision_mode:
                 images_dict = self.convert_to_images(source)
                 page_dict["image"] = images_dict.get(p.page_no)
-
             pages_output.append(page_dict)
 
         # Fallback for documents without explicit pages
diff --git a/extract_thinker/document_loader/document_loader_markitdown.py b/extract_thinker/document_loader/document_loader_markitdown.py
index 3e401d7..4b8c157 100644
--- a/extract_thinker/document_loader/document_loader_markitdown.py
+++ b/extract_thinker/document_loader/document_loader_markitdown.py
@@ -53,13 +53,18 @@ class DocumentLoaderMarkItDown(CachedDocumentLoader):
     Supports text extraction and optional image/page rendering in vision mode.
     Produces a list of pages, each with:
       - "content": text from that page
-      - "image": optional page/image bytes if vision_mode is True
+      - "image": optional page/image bytes if vision_mode is True (for non-URL sources)
+      - For URL sources, returns a single page with:
+          - "content": extracted text
+          - "image": always None
+          - "images": rendered image bytes if vision_mode is enabled
     """
 
     SUPPORTED_FORMATS = [
-        "pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", 
+        "pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx",
         "csv", "tsv", "txt", "html", "xml", "json", "zip",
-        "jpg", "jpeg", "png", "bmp", "gif", "wav", "mp3", "m4a"
+        "jpg", "jpeg", "png", "bmp", "gif", "wav", "mp3", "m4a",
+        "url"
     ]
     
     def __init__(
@@ -131,13 +136,30 @@ def _process_text(self, text: str) -> str:
         """Apply any additional text processing (e.g., strip whitespace)."""
         return text if self.config.preserve_whitespace else text.strip()
 
-    def _is_url(self, source: str) -> bool:
+    def _is_url(self, potential_url: str) -> bool:
         """Check if the source is a URL."""
+        return potential_url.startswith("http://") or potential_url.startswith("https://")
+
+    def can_handle(self, source: Union[str, BytesIO]) -> bool:
+        """
+        Checks if the loader can handle the given source.
+        
+        Args:
+            source: Either a file path (str), a BytesIO stream, or a URL
+            
+        Returns:
+            bool: True if the loader can handle the source, False otherwise
+        """
         try:
-            from urllib.parse import urlparse
-            result = urlparse(source)
-            return all([result.scheme, result.netloc])
-        except:
+            if isinstance(source, str):
+                if self._is_url(source):
+                    return True
+                extension = source.split('.')[-1].lower()
+                return extension in self.SUPPORTED_FORMATS
+            elif isinstance(source, BytesIO):
+                return True
+            return False
+        except Exception:
             return False
 
     @cachedmethod(cache=attrgetter('cache'), 
@@ -154,9 +176,27 @@ def load(self, source: Union[str, BytesIO]) -> List[Dict[str, Any]]:
 
         Returns:
             A list of dictionaries where each dict is one "page" of text.
-            - "content": The text content (str)
-            - "image": Optional bytes if vision mode is enabled (key only present if vision_mode is True)
+            For non-URL sources:
+              - "content": The text content (str)
+              - "image": Optional bytes if vision mode is enabled (key only present if vision_mode is True)
+            For URL sources:
+              - "content": The text content (str)
+              - "image": Always None
+              - "images": Optional rendered image bytes if vision mode is enabled
         """
+        # Handle URL sources separately
+        if isinstance(source, str) and self._is_url(source):
+            try:
+                result = self.markitdown.convert(source)
+                text_content = result.text_content or ""
+                page_output = {"content": text_content, "image": None}
+                if self.vision_mode:
+                    images_dict = self.convert_to_images(source)
+                    page_output["images"] = images_dict.get(0)
+                return [page_output]
+            except Exception as e:
+                raise ValueError(f"Error processing document with MarkItDown URL handling: {str(e)}")
+
         if not self.can_handle(source):
             raise ValueError(f"Cannot handle source: {source}")
 
@@ -170,7 +210,7 @@ def load(self, source: Union[str, BytesIO]) -> List[Dict[str, Any]]:
                 # File path
                 result = self.markitdown.convert(source)
             else:
-                # BytesIO
+                # BytesIO stream
                 source.seek(0)
                 if self.config.mime_type_detection:
                     mime = magic.from_buffer(source.getvalue(), mime=True)
@@ -189,13 +229,9 @@ def load(self, source: Union[str, BytesIO]) -> List[Dict[str, Any]]:
                 source.seek(0)
 
             # Full text from MarkItDown
-            text_content = result.text_content
-            if not text_content:
-                text_content = ""
-
+            text_content = result.text_content or ""
             # Split text content into pages (based on config.page_separator)
             raw_pages = text_content.split(self.config.page_separator)
-
             pages = []
             for page_text in raw_pages:
                 processed = self._process_text(page_text)
@@ -215,25 +251,4 @@ def load(self, source: Union[str, BytesIO]) -> List[Dict[str, Any]]:
             return pages
 
         except Exception as e:
-            raise ValueError(f"Error processing document with MarkItDown: {str(e)}")
-
-    def can_handle(self, source: Union[str, BytesIO]) -> bool:
-        """
-        Checks if the loader can handle the given source.
-        
-        Args:
-            source: Either a file path (str), a BytesIO stream, or a URL
-            
-        Returns:
-            bool: True if the loader can handle the source, False otherwise
-        """
-        try:
-            if isinstance(source, str):
-                if self._is_url(source):
-                    return True
-                return self._can_handle_file_path(source)
-            elif isinstance(source, BytesIO):
-                return self._can_handle_stream(source)
-            return False
-        except Exception:
-            return False
\ No newline at end of file
+            raise ValueError(f"Error processing document with MarkItDown: {str(e)}")
\ No newline at end of file
diff --git a/extract_thinker/pagination_handler.py b/extract_thinker/pagination_handler.py
index eb5b035..fbd2c81 100644
--- a/extract_thinker/pagination_handler.py
+++ b/extract_thinker/pagination_handler.py
@@ -164,7 +164,7 @@ def _merge_list_field(self, field_name: str, values: List[Any], field_type: Any)
                         break
 
                 if unique_key:
-                    # Merge by unique key
+                    # Merge by unique key using case-insensitive comparison
                     merged_by_key = {}
                     for item in flattened:
                         if hasattr(item, 'model_dump'):
@@ -173,13 +173,14 @@ def _merge_list_field(self, field_name: str, values: List[Any], field_type: Any)
                             item_dict = item
                         key_val = item_dict.get(unique_key)
                         if key_val is not None:
-                            if key_val in merged_by_key:
-                                merged_by_key[key_val] = self._merge_two_models(
-                                    merged_by_key[key_val],
+                            normalized_key = str(key_val).lower()
+                            if normalized_key in merged_by_key:
+                                merged_by_key[normalized_key] = self._merge_two_models(
+                                    merged_by_key[normalized_key],
                                     item_dict
                                 )
                             else:
-                                merged_by_key[key_val] = item_dict
+                                merged_by_key[normalized_key] = item_dict
                         else:
                             # If no unique key found for this item, just store it uniquely
                             merged_by_key[f"no_key_{len(merged_by_key)}"] = item_dict
diff --git a/poetry.lock b/poetry.lock
index cd6f78f..8f68b2b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -802,6 +802,93 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,
 test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
 tqdm = ["tqdm"]
 
+[[package]]
+name = "greenlet"
+version = "3.1.1"
+description = "Lightweight in-process concurrent programming"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"},
+    {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"},
+    {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36b89d13c49216cadb828db8dfa6ce86bbbc476a82d3a6c397f0efae0525bdd0"},
+    {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94b6150a85e1b33b40b1464a3f9988dcc5251d6ed06842abff82e42632fac120"},
+    {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93147c513fac16385d1036b7e5b102c7fbbdb163d556b791f0f11eada7ba65dc"},
+    {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da7a9bff22ce038e19bf62c4dd1ec8391062878710ded0a845bcf47cc0200617"},
+    {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b2795058c23988728eec1f36a4e5e4ebad22f8320c85f3587b539b9ac84128d7"},
+    {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ed10eac5830befbdd0c32f83e8aa6288361597550ba669b04c48f0f9a2c843c6"},
+    {file = "greenlet-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:77c386de38a60d1dfb8e55b8c1101d68c79dfdd25c7095d51fec2dd800892b80"},
+    {file = "greenlet-3.1.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e4d333e558953648ca09d64f13e6d8f0523fa705f51cae3f03b5983489958c70"},
+    {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fc016b73c94e98e29af67ab7b9a879c307c6731a2c9da0db5a7d9b7edd1159"},
+    {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5e975ca70269d66d17dd995dafc06f1b06e8cb1ec1e9ed54c1d1e4a7c4cf26e"},
+    {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2813dc3de8c1ee3f924e4d4227999285fd335d1bcc0d2be6dc3f1f6a318ec1"},
+    {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e347b3bfcf985a05e8c0b7d462ba6f15b1ee1c909e2dcad795e49e91b152c383"},
+    {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e8f8c9cb53cdac7ba9793c276acd90168f416b9ce36799b9b885790f8ad6c0a"},
+    {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62ee94988d6b4722ce0028644418d93a52429e977d742ca2ccbe1c4f4a792511"},
+    {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1776fd7f989fc6b8d8c8cb8da1f6b82c5814957264d1f6cf818d475ec2bf6395"},
+    {file = "greenlet-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:48ca08c771c268a768087b408658e216133aecd835c0ded47ce955381105ba39"},
+    {file = "greenlet-3.1.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:4afe7ea89de619adc868e087b4d2359282058479d7cfb94970adf4b55284574d"},
+    {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79"},
+    {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3a701fe5a9695b238503ce5bbe8218e03c3bcccf7e204e455e7462d770268aa"},
+    {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2846930c65b47d70b9d178e89c7e1a69c95c1f68ea5aa0a58646b7a96df12441"},
+    {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99cfaa2110534e2cf3ba31a7abcac9d328d1d9f1b95beede58294a60348fba36"},
+    {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1443279c19fca463fc33e65ef2a935a5b09bb90f978beab37729e1c3c6c25fe9"},
+    {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b7cede291382a78f7bb5f04a529cb18e068dd29e0fb27376074b6d0317bf4dd0"},
+    {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23f20bb60ae298d7d8656c6ec6db134bca379ecefadb0b19ce6f19d1f232a942"},
+    {file = "greenlet-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:7124e16b4c55d417577c2077be379514321916d5790fa287c9ed6f23bd2ffd01"},
+    {file = "greenlet-3.1.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:05175c27cb459dcfc05d026c4232f9de8913ed006d42713cb8a5137bd49375f1"},
+    {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935e943ec47c4afab8965954bf49bfa639c05d4ccf9ef6e924188f762145c0ff"},
+    {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667a9706c970cb552ede35aee17339a18e8f2a87a51fba2ed39ceeeb1004798a"},
+    {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8a678974d1f3aa55f6cc34dc480169d58f2e6d8958895d68845fa4ab566509e"},
+    {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efc0f674aa41b92da8c49e0346318c6075d734994c3c4e4430b1c3f853e498e4"},
+    {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0153404a4bb921f0ff1abeb5ce8a5131da56b953eda6e14b88dc6bbc04d2049e"},
+    {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:275f72decf9932639c1c6dd1013a1bc266438eb32710016a1c742df5da6e60a1"},
+    {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c4aab7f6381f38a4b42f269057aee279ab0fc7bf2e929e3d4abfae97b682a12c"},
+    {file = "greenlet-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42703b1cf69f2aa1df7d1030b9d77d3e584a70755674d60e710f0af570f3761"},
+    {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1695e76146579f8c06c1509c7ce4dfe0706f49c6831a817ac04eebb2fd02011"},
+    {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7876452af029456b3f3549b696bb36a06db7c90747740c5302f74a9e9fa14b13"},
+    {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ead44c85f8ab905852d3de8d86f6f8baf77109f9da589cb4fa142bd3b57b475"},
+    {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8320f64b777d00dd7ccdade271eaf0cad6636343293a25074cc5566160e4de7b"},
+    {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6510bf84a6b643dabba74d3049ead221257603a253d0a9873f55f6a59a65f822"},
+    {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:04b013dc07c96f83134b1e99888e7a79979f1a247e2a9f59697fa14b5862ed01"},
+    {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6"},
+    {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47da355d8687fd65240c364c90a31569a133b7b60de111c255ef5b606f2ae291"},
+    {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98884ecf2ffb7d7fe6bd517e8eb99d31ff7855a840fa6d0d63cd07c037f6a981"},
+    {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1d4aeb8891338e60d1ab6127af1fe45def5259def8094b9c7e34690c8858803"},
+    {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db32b5348615a04b82240cc67983cb315309e88d444a288934ee6ceaebcad6cc"},
+    {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dcc62f31eae24de7f8dce72134c8651c58000d3b1868e01392baea7c32c247de"},
+    {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1d3755bcb2e02de341c55b4fca7a745a24a9e7212ac953f6b3a48d117d7257aa"},
+    {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b8da394b34370874b4572676f36acabac172602abf054cbc4ac910219f3340af"},
+    {file = "greenlet-3.1.1-cp37-cp37m-win32.whl", hash = "sha256:a0dfc6c143b519113354e780a50381508139b07d2177cb6ad6a08278ec655798"},
+    {file = "greenlet-3.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:54558ea205654b50c438029505def3834e80f0869a70fb15b871c29b4575ddef"},
+    {file = "greenlet-3.1.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:346bed03fe47414091be4ad44786d1bd8bef0c3fcad6ed3dee074a032ab408a9"},
+    {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfc59d69fc48664bc693842bd57acfdd490acafda1ab52c7836e3fc75c90a111"},
+    {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21e10da6ec19b457b82636209cbe2331ff4306b54d06fa04b7c138ba18c8a81"},
+    {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37b9de5a96111fc15418819ab4c4432e4f3c2ede61e660b1e33971eba26ef9ba"},
+    {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ef9ea3f137e5711f0dbe5f9263e8c009b7069d8a1acea822bd5e9dae0ae49c8"},
+    {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85f3ff71e2e60bd4b4932a043fbbe0f499e263c628390b285cb599154a3b03b1"},
+    {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:95ffcf719966dd7c453f908e208e14cde192e09fde6c7186c8f1896ef778d8cd"},
+    {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:03a088b9de532cbfe2ba2034b2b85e82df37874681e8c470d6fb2f8c04d7e4b7"},
+    {file = "greenlet-3.1.1-cp38-cp38-win32.whl", hash = "sha256:8b8b36671f10ba80e159378df9c4f15c14098c4fd73a36b9ad715f057272fbef"},
+    {file = "greenlet-3.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:7017b2be767b9d43cc31416aba48aab0d2309ee31b4dbf10a1d38fb7972bdf9d"},
+    {file = "greenlet-3.1.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:396979749bd95f018296af156201d6211240e7a23090f50a8d5d18c370084dc3"},
+    {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca9d0ff5ad43e785350894d97e13633a66e2b50000e8a183a50a88d834752d42"},
+    {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f6ff3b14f2df4c41660a7dec01045a045653998784bf8cfcb5a525bdffffbc8f"},
+    {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94ebba31df2aa506d7b14866fed00ac141a867e63143fe5bca82a8e503b36437"},
+    {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73aaad12ac0ff500f62cebed98d8789198ea0e6f233421059fa68a5aa7220145"},
+    {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63e4844797b975b9af3a3fb8f7866ff08775f5426925e1e0bbcfe7932059a12c"},
+    {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7939aa3ca7d2a1593596e7ac6d59391ff30281ef280d8632fa03d81f7c5f955e"},
+    {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d0028e725ee18175c6e422797c407874da24381ce0690d6b9396c204c7f7276e"},
+    {file = "greenlet-3.1.1-cp39-cp39-win32.whl", hash = "sha256:5e06afd14cbaf9e00899fae69b24a32f2196c19de08fcb9f4779dd4f004e5e7c"},
+    {file = "greenlet-3.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:3319aa75e0e0639bc15ff54ca327e8dc7a6fe404003496e3c6925cd3142e0e22"},
+    {file = "greenlet-3.1.1.tar.gz", hash = "sha256:4ce3ac6cdb6adf7946475d7ef31777c26d94bccc377e070a7986bd2d5c515467"},
+]
+
+[package.extras]
+docs = ["Sphinx", "furo"]
+test = ["objgraph", "psutil"]
+
 [[package]]
 name = "h11"
 version = "0.14.0"
@@ -1744,6 +1831,27 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-a
 test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
 type = ["mypy (>=1.11.2)"]
 
+[[package]]
+name = "playwright"
+version = "1.50.0"
+description = "A high-level API to automate web browsers"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "playwright-1.50.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:f36d754a6c5bd9bf7f14e8f57a2aea6fd08f39ca4c8476481b9c83e299531148"},
+    {file = "playwright-1.50.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:40f274384591dfd27f2b014596250b2250c843ed1f7f4ef5d2960ecb91b4961e"},
+    {file = "playwright-1.50.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:9922ef9bcd316995f01e220acffd2d37a463b4ad10fd73e388add03841dfa230"},
+    {file = "playwright-1.50.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:8fc628c492d12b13d1f347137b2ac6c04f98197ff0985ef0403a9a9ee0d39131"},
+    {file = "playwright-1.50.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcff35f72db2689a79007aee78f1b0621a22e6e3d6c1f58aaa9ac805bf4497c"},
+    {file = "playwright-1.50.0-py3-none-win32.whl", hash = "sha256:3b906f4d351260016a8c5cc1e003bb341651ae682f62213b50168ed581c7558a"},
+    {file = "playwright-1.50.0-py3-none-win_amd64.whl", hash = "sha256:1859423da82de631704d5e3d88602d755462b0906824c1debe140979397d2e8d"},
+]
+
+[package.dependencies]
+greenlet = ">=3.1.1,<4.0.0"
+pyee = ">=12,<13"
+
 [[package]]
 name = "pluggy"
 version = "1.5.0"
@@ -2085,6 +2193,24 @@ files = [
 [package.dependencies]
 typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
 
+[[package]]
+name = "pyee"
+version = "12.1.1"
+description = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "pyee-12.1.1-py3-none-any.whl", hash = "sha256:18a19c650556bb6b32b406d7f017c8f513aceed1ef7ca618fb65de7bd2d347ef"},
+    {file = "pyee-12.1.1.tar.gz", hash = "sha256:bbc33c09e2ff827f74191e3e5bbc6be7da02f627b7ec30d86f5ce1a6fb2424a3"},
+]
+
+[package.dependencies]
+typing-extensions = "*"
+
+[package.extras]
+dev = ["black", "build", "flake8", "flake8-black", "isort", "jupyter-console", "mkdocs", "mkdocs-include-markdown-plugin", "mkdocstrings[python]", "pytest", "pytest-asyncio", "pytest-trio", "sphinx", "toml", "tox", "trio", "trio", "trio-typing", "twine", "twisted", "validate-pyproject[all]"]
+
 [[package]]
 name = "pyflakes"
 version = "3.2.0"
@@ -3126,4 +3252,4 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9,<3.14"
-content-hash = "3b843ac3af622ab2801fa34a523dedba241688a13fe9321f8a084eef235b7c71"
+content-hash = "46f4b5e4c32ffe2d06dd23bfbcf450b7c4edadd3538749df59cd311eb51c5ff7"
diff --git a/pyproject.toml b/pyproject.toml
index d57df0e..7f2fd6c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,7 @@ cachetools = "^5.3.3"
 pyyaml = "^6.0.1"
 tiktoken = {version = "^0.8.0", python = ">=3.9,<3.13"}
 python-magic = "^0.4.27"
+playwright = "^1.50.0"
 
 [tool.poetry.dev-dependencies]
 flake8 = "^7.1.1"
diff --git a/tests/models/gdp_contract.py b/tests/models/gdp_contract.py
index c1c2228..dbd044d 100644
--- a/tests/models/gdp_contract.py
+++ b/tests/models/gdp_contract.py
@@ -21,11 +21,21 @@ class RegionData(Contract):
     provinces: List[ProvinceData] = Field(default_factory=list)
 
 class CountryData(Contract):
-    country: str
-    total_gdp_million: Optional[float] = Field(None, description="Total GDP (€ million)")
-    regions: List[RegionData] = Field(default_factory=list, description="Make sure to ignore Extra-regio*/Extra-region")
+    country: str = Field(
+        ...,
+        description="Country name as it appears in the PDF. IMPORTANT: Extract this value from every page and aggregate unique entries, not just the first occurrence."
+    )
+    total_gdp_million: Optional[float] = Field(
+        None,
+        description="Total GDP (€ million) for the country, using the value from any page in the document."
+    )
+    regions: List[RegionData] = Field(
+        default_factory=list,
+        description="List of regions for the country. Aggregate all regions from every page and ignore any formatting variations like 'Extra-regio*/Extra-region'."
+    )
 
 class EUData(Contract):
+    thinking: str = Field(None, description="Think step by step. You have 2 pages dont forget to add them.")
     eu_total_gdp_million_27: float = Field(None, description="EU27 Total GDP (€ million)")
     eu_total_gdp_million_28: float = Field(None, description="EU28 Total GDP (€ million)")
-    countries: List[CountryData] 
\ No newline at end of file
+    countries: List[CountryData] = Field(None, description="List of countries. Make sure you add all countries of every page, not just the first one.")
\ No newline at end of file
diff --git a/tests/models/handbook_contract.py b/tests/models/handbook_contract.py
new file mode 100644
index 0000000..f8f8732
--- /dev/null
+++ b/tests/models/handbook_contract.py
@@ -0,0 +1,4 @@
+from pydantic import BaseModel
+
+class HandbookContract(BaseModel):
+    title: str 
\ No newline at end of file
diff --git a/tests/test_document_loader_docling.py b/tests/test_document_loader_docling.py
index 2a0e4ce..c626ea3 100644
--- a/tests/test_document_loader_docling.py
+++ b/tests/test_document_loader_docling.py
@@ -234,13 +234,17 @@ def test_title_extraction(self):
 
     def test_url_loading(self, loader):
         """Test loading from a URL for Docling loader."""
-        url = "https://www.handbook.fca.org.uk/handbook/BCOBS/2/?view=chapter"
+        loader = DocumentLoaderDocling()
+        url = "https://www.handbook.fca.org.uk/handbook/BCOBS/2A/?view=chapter"
         # Ensure the loader recognizes and can handle a URL
+        loader.set_vision_mode(True)
         assert loader.can_handle(url) is True
-
+        
         pages = loader.load(url)
         assert isinstance(pages, list)
         assert len(pages) > 0
         for page in pages:
             assert "content" in page
+            assert "images" in page
+            assert len(page["images"]) == 3 
             assert isinstance(page["content"], str)
\ No newline at end of file
diff --git a/tests/test_document_loader_markitdown.py b/tests/test_document_loader_markitdown.py
index 1d3944e..db95dc3 100644
--- a/tests/test_document_loader_markitdown.py
+++ b/tests/test_document_loader_markitdown.py
@@ -159,23 +159,22 @@ def test_page_separator_splitting(self):
         loader = DocumentLoaderMarkItDown(config)
         pages = loader.load(bulk_pdf_path)
 
-        # Verify we get exactly 3 pages
+        # Verify we get exactly 2 pages (as per current expectations)
         assert len(pages) == 2, f"Expected 2 pages, got {len(pages)}"
 
     def test_url_loading(self, loader):
         """Test loading from a URL for MarkItDown loader."""
-        url = "https://www.handbook.fca.org.uk/handbook/BCOBS/2/?view=chapter"
+        url = "https://www.handbook.fca.org.uk/handbook/BCOBS/2A/?view=chapter"
         # Verify that the loader accepts the URL as a valid source.
+        loader.set_vision_mode(True)
         assert loader.can_handle(url) is True
 
         pages = loader.load(url)
         assert isinstance(pages, list)
         assert len(pages) > 0
         for page in pages:
+            # This test expects the URL branch to return a page with "images" if vision mode is enabled.
             assert "content" in page
-            assert isinstance(page["content"], str)
-
-    def test_can_handle_url(self, loader):
-        """Test that MarkItDown loader correctly identifies URL sources."""
-        url = "https://www.handbook.fca.org.uk/handbook/BCOBS/2/?view=chapter"
-        assert loader.can_handle(url) is True
\ No newline at end of file
+            assert "images" in page
+            assert len(page["images"]) == 3
+            assert isinstance(page["content"], str)
\ No newline at end of file
diff --git a/tests/test_extractor.py b/tests/test_extractor.py
index 8976a19..7444ecd 100644
--- a/tests/test_extractor.py
+++ b/tests/test_extractor.py
@@ -16,6 +16,9 @@
 import pytest
 import numpy as np
 from litellm import embedding
+from extract_thinker.document_loader.document_loader_docling import DocumentLoaderDocling
+from tests.models.handbook_contract import HandbookContract
+
 
 load_dotenv()
 cwd = os.getcwd()
@@ -190,7 +193,7 @@ def test_pagination_handler():
     test_file_path = os.path.join(os.getcwd(), "tests", "files", "Regional_GDP_per_capita_2018_2.pdf")
 
     extractor = Extractor()
-    extractor.load_document_loader(DocumentLoaderPdfPlumber())
+    extractor.load_document_loader(DocumentLoaderDocling())
     extractor.load_llm("gpt-4o")
 
     # Create and run both extractions in parallel
@@ -405,3 +408,22 @@ def test_extract_with_pydanticai_backend():
 
     except ImportError:
         pytest.skip("pydantic-ai not installed")
+
+def test_extract_from_url_docling_and_gpt4o_mini():
+    """
+    Test extraction from a URL using the Docling document loader and gpt-4o-mini LLM.
+    The test asserts that the extracted title is as expected.
+    """
+    url = "https://www.handbook.fca.org.uk/handbook/BCOBS/2A/?view=chapter"
+
+    # Initialize the extractor, load the Docling loader and the gpt-4o-mini LLM
+    extractor = Extractor()
+    extractor.load_document_loader(DocumentLoaderDocling())
+    extractor.load_llm("gpt-4o-mini")
+    
+    # Act: Extract the document using the specified URL and the HandbookContract
+    result = extractor.extract(url, HandbookContract)
+
+    # Assert: Verify that the extracted title matches the expected value.
+    expected_title = "BCOBS 2A.1 Restriction on marketing or providing an optional product for which a fee is payable"
+    assert result.title == expected_title
\ No newline at end of file