diff --git a/.DS_Store b/.DS_Store
index 0269d10..fd450db 100644
Binary files a/.DS_Store and b/.DS_Store differ
diff --git a/docs/.DS_Store b/docs/.DS_Store
index 5c2ecda..2abecf5 100644
Binary files a/docs/.DS_Store and b/docs/.DS_Store differ
diff --git a/docs/build/html/_sources/api/_autosummary/ragoon.web_rag.WebRAG.rst b/docs/build/html/_sources/api 2/_autosummary/ragoon.web_rag.WebRAG.rst
similarity index 100%
rename from docs/build/html/_sources/api/_autosummary/ragoon.web_rag.WebRAG.rst
rename to docs/build/html/_sources/api 2/_autosummary/ragoon.web_rag.WebRAG.rst
diff --git a/docs/source/.DS_Store b/docs/source/.DS_Store
index 542cea5..44dbe91 100644
Binary files a/docs/source/.DS_Store and b/docs/source/.DS_Store differ
diff --git a/docs/source/_static/.DS_Store b/docs/source/_static/.DS_Store
index da60206..7a87c4d 100644
Binary files a/docs/source/_static/.DS_Store and b/docs/source/_static/.DS_Store differ
diff --git a/docs/source/conf.py b/docs/source/conf.py
index dbd75e7..9009db9 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -16,7 +16,7 @@
 project = 'RAGoon'
 copyright = '2024, Louis Brulé Naudet'
 author = 'Louis Brulé Naudet'
-release = '0.0.13'
+release = '0.0.14'
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/pyproject.toml b/pyproject.toml
index df4f433..ed1dc79 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ragoon"
-version = "0.0.13"
+version = "0.0.14"
 description = "RAGoon : High level library for batched embeddings generation, blazingly-fast web-based RAG and quantized indexes processing ⚡"
 readme = "README.md"
 requires-python = ">=3.9"
diff --git a/src/.DS_Store b/src/.DS_Store
index 800fdfe..145de9d 100644
Binary files a/src/.DS_Store and b/src/.DS_Store differ
diff --git a/src/ragoon/__init__.py b/src/ragoon/__init__.py
index a0c5139..c06a0a7 100644
--- a/src/ragoon/__init__.py
+++ b/src/ragoon/__init__.py
@@ -6,7 +6,8 @@
 
 from ragoon.chunks import (
 	ChunkMetadata,
-	DatasetChunker
+	DatasetChunker,
+	SemanticTextSplitter
 )
 
 from ragoon.embeddings import (
diff --git a/src/ragoon/chunks.py b/src/ragoon/chunks.py
index f3610da..b890f3e 100644
--- a/src/ragoon/chunks.py
+++ b/src/ragoon/chunks.py
@@ -38,6 +38,7 @@
     DatasetDict
 )
 
+from huggingface_hub import InferenceClient
 from tqdm import tqdm
 from transformers import AutoTokenizer
 
@@ -46,6 +47,381 @@
 logger = Logger()
 
 
+class SemanticTextSplitter:
+    """
+    A class for splitting text into semantically coherent sections using a language model.
+
+    This class leverages the Hugging Face Inference API to generate splits in the input text,
+    and then processes the result to return a list of split text sections. It is designed
+    to work with various language models available through the Hugging Face platform.
+
+    Parameters
+    ----------
+    model : str, optional
+        The name or path of the Hugging Face model to use for text splitting.
+        This should be a model capable of text generation tasks, such as GPT-based models.
+        Default is 'meta-llama/Meta-Llama-3.1-70B-Instruct'.
+
+    token : str, optional
+        The Hugging Face API token for authentication. If not provided, the class will
+        attempt to use the token stored in the Hugging Face CLI configuration.
+
+    split_token : str, optional
+        The token used to split the text (default is '<|split|>'). This token will be
+        inserted by the model to indicate where the text should be split.
+
+    system_prompt : str, optional
+        The system prompt to use for the model. If not provided, a default prompt
+        will be used, which instructs the model on how to split the text.
+
+    max_tokens : int, optional
+        The maximum number of tokens to generate in the model's response (default is 4096).
+        This limit applies to the entire response, including the input prompt.
+
+    stream : bool, optional
+        Whether to stream the model's output (default is True). When True, the output
+        will be printed as it's generated. When False, the output will be returned all at once.
+
+    Attributes
+    ----------
+    client : InferenceClient
+        The Hugging Face Inference API client used to communicate with the model.
+
+    split_token : str
+        The token used to split the text.
+
+    system_prompt : str
+        The system prompt used to instruct the model on how to split the text.
+
+    max_tokens : int
+        The maximum number of tokens to generate in the model's response.
+
+    stream : bool
+        Whether to stream the model's output.
+
+    Methods
+    -------
+    completion(text: str) -> str
+        Calls the language model to process the input text.
+
+    split(text: str) -> List[str]
+        Splits the input text into semantically coherent sections.
+
+    Raises
+    ------
+    ValueError
+        If the model name is not provided during initialization.
+
+    RuntimeError
+        If there's an error calling the Hugging Face Inference API.
+
+    Examples
+    --------
+    >>> # Ensure you have set up your Hugging Face token using `huggingface-cli login`
+    >>> splitter = SemanticTextSplitter(
+    ...     model="meta-llama/Llama-2-70b-chat-hf",
+    ...     token=api.token  # This will use your stored Hugging Face token
+    ... )
+    >>> text = '''
+    ... The Python programming language, created by Guido van Rossum,
+    ... has become one of the most popular languages in the world.
+    ... Its simplicity and readability make it an excellent choice for beginners.
+    ... Meanwhile, data science has emerged as a crucial field in the modern world.
+    ... Python's extensive libraries, such as NumPy and Pandas, have made it
+    ... a favorite among data scientists and analysts.
+    ... '''
+    >>> result = splitter.split(text)
+    >>> for section in result:
+    ...     print(f"Section: {section}\n")
+    Section: The Python programming language, created by Guido van Rossum,
+    has become one of the most popular languages in the world.
+    Its simplicity and readability make it an excellent choice for beginners.
+
+    Section: Meanwhile, data science has emerged as a crucial field in the modern world.
+    Python's extensive libraries, such as NumPy and Pandas, have made it
+    a favorite among data scientists and analysts.
+
+    Notes
+    -----
+    - The quality of the text splitting depends on the capabilities of the chosen language model.
+    - The system prompt plays a crucial role in guiding the model's behavior. Customizing it
+      can lead to different splitting results.
+    - When using streamed output, the results are printed to the console in real-time,
+      which can be useful for monitoring long-running splits.
+    - The split token ('<split>' by default) should be chosen carefully to avoid conflicts
+      with the content of the text being split.
+
+    See Also
+    --------
+    huggingface_hub.InferenceClient : The client used to interact with Hugging Face models.
+    """
+
+    def __init__(
+        self,
+        model: Optional[str] = "meta-llama/Meta-Llama-3.1-70B-Instruct",
+        token: Optional[str] = None,
+        split_token: str = "<|split|>",
+        system_prompt: Optional[str] = None,
+        max_tokens: int = 4096,
+        stream: bool = True,
+    ):
+        """
+        Initialize the SemanticTextSplitter.
+
+        Parameters
+        ----------
+        model : str, optional
+            The name or path of the Hugging Face model to use for text splitting.
+            Default is 'meta-llama/Meta-Llama-3.1-70B-Instruct'.
+
+        token : str, optional
+            The Hugging Face API token for authentication.
+
+        split_token : str, optional
+            The token used to split the text (default is '<split>').
+
+        system_prompt : str, optional
+            The system prompt to use for the model. If None, a default prompt is used.
+
+        max_tokens : int, optional
+            The maximum number of tokens to generate (default is 4096).
+
+        stream : bool, optional
+            Whether to stream the model's output (default is True).
+
+        Raises
+        ------
+        ValueError
+            If the model name is not provided.
+        """
+        if not model:
+            raise ValueError("Model name must be provided.")
+
+        self.client = InferenceClient(model, token=token)
+        self.split_token = split_token
+        self.max_tokens = max_tokens
+        self.stream = stream
+
+        self.system_prompt = system_prompt or self._default_system_prompt()
+
+    def completion(self, text: str) -> str:
+        """
+        Call the language model to process the input text.
+
+        This method sends the input text to the language model via the Hugging Face
+        Inference API and returns the model's output.
+
+        Parameters
+        ----------
+        text : str
+            The input text to be processed by the model.
+
+        Returns
+        -------
+        str
+            The processed text returned by the model, potentially including split tokens.
+
+        Raises
+        ------
+        RuntimeError
+            If there's an error calling the Hugging Face Inference API.
+
+        Notes
+        -----
+        - If streaming is enabled, the method will print the output in real-time
+          and return the complete output as a string.
+        - If streaming is disabled, the method will return the complete output
+          after the model finishes processing.
+        """
+        try:
+            messages = [
+                {"role": "system", "content": self.system_prompt},
+                {"role": "user", "content": text},
+            ]
+
+            if self.stream:
+                return self._stream_completion(messages)
+
+            else:
+                return self._non_stream_completion(messages)
+
+        except Exception as e:
+            raise RuntimeError(f"Error calling Hugging Face Inference API: {str(e)}")
+
+    def split(self, text: str) -> List[str]:
+        """
+        Split the input text into semantically coherent sections.
+
+        This method sends the input text to the language model for processing,
+        then splits the returned text based on the specified split token.
+
+        Parameters
+        ----------
+        text : str
+            The input text to be split.
+
+        Returns
+        -------
+        List[str]
+            A list of strings, each representing a semantically coherent section
+            of the input text.
+
+        Examples
+        --------
+        >>> splitter = SemanticTextSplitter(
+        ...     model="meta-llama/Llama-2-70b-chat-hf",
+        ...     token="your_hf_token_here"
+        ... )
+        >>> text = '''
+        ... Machine learning is a subset of artificial intelligence
+        ... that focuses on the development of algorithms and statistical models.
+        ... It enables computer systems to improve their performance on a specific task
+        ... through experience, without being explicitly programmed.
+        ... On the other hand, deep learning is a subset of machine learning
+        ... that uses artificial neural networks with multiple layers
+        ... to progressively extract higher-level features from raw input.
+        ... '''
+        >>> result = splitter.split(text)
+        >>> for idx, section in enumerate(result, 1):
+        ...     print(f"Section {idx}:\n{section}\n")
+        Section 1:
+        Machine learning is a subset of artificial intelligence
+        that focuses on the development of algorithms and statistical models.
+        It enables computer systems to improve their performance on a specific task
+        through experience, without being explicitly programmed.
+
+        Section 2:
+        On the other hand, deep learning is a subset of machine learning
+        that uses artificial neural networks with multiple layers
+        to progressively extract higher-level features from raw input.
+
+        Notes
+        -----
+        - The quality of the splitting depends on the language model's understanding
+          of the text and its ability to identify semantic boundaries.
+        - The method uses the `completion` method internally to process the text,
+          so any streaming behavior will occur during this step.
+        - Empty sections (after stripping whitespace) are automatically removed
+          from the final output.
+        """
+        processed_text = self.completion(text=text)
+
+        return [
+            section.strip()
+            for section in re.split(f"{re.escape(self.split_token)}", processed_text)
+            if section.strip()
+        ]
+
+    def _default_system_prompt(self) -> str:
+        """
+        Generate the default system prompt for the language model.
+
+        This method creates a detailed instruction set for the language model,
+        guiding it on how to split the input text into semantically coherent sections.
+
+        Returns
+        -------
+        str
+            The default system prompt as a string.
+
+        Notes
+        -----
+        - This method is called internally if no custom system prompt is provided
+          during initialization.
+        - The prompt includes specific instructions on how to use the split token,
+          handle different types of text, and maintain the integrity of the original content.
+        """
+        return f"""You are an assistant specialized in analyzing and dividing complex texts. Your task is to divide the provided text into semantically coherent sections, inserting the '{self.split_token}' tag between each distinct section. Follow these guidelines:
+
+- Carefully analyze the semantic content of the text.
+- Identify changes in theme, subject, or major concept.
+- Insert the '{self.split_token}' tag at each point where you detect a significant change in semantic content.
+- Ensure that each resulting section is self-contained and thematically coherent.
+- Avoid dividing the text into sections that are too small or too numerous. Aim for divisions that capture complete ideas or concepts.
+- Do not modify the original text apart from adding the '{self.split_token}' tags.
+- Do not add explanations, comments, or additional metadata.
+- If the text already contains natural divisions (such as paragraphs), use them as a guide, but don't hesitate to divide further if necessary for semantic coherence.
+- Be consistent in your approach to division throughout the text.
+- If the text is short or deals with a single coherent subject, it is acceptable not to divide it at all.
+- Titles and subtitles should not be divided but only provide you with additional context.
+- Correct format inconsistencies in the textual content if necessary, without modifying the text itself.
+- You need to follow this instruction in all languages and always respond in the language of the provided text.
+
+Your goal is to produce a version of the text divided in a way that facilitates subsequent labeling and analysis by language models. Focus solely on dividing the text and correcting the format according to these instructions, without adding any additional content."""
+
+    def _stream_completion(self, messages: List[dict]) -> str:
+        """
+        Process the model's output in streaming mode.
+
+        This method handles the streaming of the model's output, printing it
+        in real-time and accumulating it into a single string.
+
+        Parameters
+        ----------
+        messages : List[dict]
+            A list of message dictionaries to be sent to the model.
+            Each dictionary should have 'role' and 'content' keys.
+
+        Returns
+        -------
+        str
+            The complete output from the model as a single string.
+
+        Notes
+        -----
+        - This method is called internally by the `completion` method when
+          streaming is enabled.
+        - It prints each chunk of the model's output as it's received, providing
+          real-time feedback for long-running processes.
+        """
+        message = ""
+
+        for chunk in self.client.chat_completion(
+            messages=messages,
+            max_tokens=self.max_tokens,
+            stream=True,
+        ):
+            if chunk.choices[0].delta.content is not None:
+                content = chunk.choices[0].delta.content
+                print(content, end="", flush=True)
+                message += content
+
+        return message
+
+    def _non_stream_completion(self, messages: List[dict]) -> str:
+        """
+        Process the model's output in non-streaming mode.
+
+        This method handles the model's output when streaming is disabled,
+        returning the complete response at once.
+
+        Parameters
+        ----------
+        messages : List[dict]
+            A list of message dictionaries to be sent to the model.
+            Each dictionary should have 'role' and 'content' keys.
+
+        Returns
+        -------
+        str
+            The complete output from the model as a single string.
+
+        Notes
+        -----
+        - This method is called internally by the `completion` method when
+          streaming is disabled.
+        - It waits for the model to complete its entire response before returning,
+          which may take longer for large inputs but provides the entire output at once.
+        """
+        response = self.client.chat_completion(
+            messages=messages,
+            max_tokens=self.max_tokens,
+            stream=False,
+        )
+
+        return response.choices[0].message.content
+
+
 @dataclass
 class ChunkMetadata:
     """
diff --git a/tests/.DS_Store b/tests/.DS_Store
index 12a80b9..99549fe 100644
Binary files a/tests/.DS_Store and b/tests/.DS_Store differ
diff --git a/tests/data/.DS_Store b/tests/data/.DS_Store
index 189e44b..689d466 100644
Binary files a/tests/data/.DS_Store and b/tests/data/.DS_Store differ