From 8345b2208db7ca959a81e54a9d719b40113f50ff Mon Sep 17 00:00:00 2001
From: Gabriele Venturi <lele.venturi@gmail.com>
Date: Thu, 16 Nov 2023 00:08:26 +0100
Subject: [PATCH] refactor: remove PandasAI

---
 pandasai/__init__.py                    | 227 ------------------------
 tests/helpers/test_openai_info.py       | 124 +++++++------
 tests/llms/test_huggingface_text_gen.py |   2 +-
 tests/test_pandasai.py                  |  68 -------
 4 files changed, 69 insertions(+), 352 deletions(-)
 delete mode 100644 tests/test_pandasai.py

diff --git a/pandasai/__init__.py b/pandasai/__init__.py
index c92c0db88..e07391285 100644
--- a/pandasai/__init__.py
+++ b/pandasai/__init__.py
@@ -1,48 +1,11 @@
 # -*- coding: utf-8 -*-
 """
 PandasAI is a wrapper around a LLM to make dataframes conversational
-
-This module includes the implementation of basis  PandasAI class with methods to run
-the LLMs models on Pandas dataframes. Following LLMs are implemented so far.
-
-Example:
-
-    This module is the Entry point of the `pandasai` package. Following is an example
-    of how to use this Class.
-
-    ```python
-    import pandas as pd
-    from pandasai import PandasAI
-
-    # Sample DataFrame
-    df = pd.DataFrame({
-        "country": ["United States", "United Kingdom", "France", "Germany", "Italy",
-        "Spain", "Canada", "Australia", "Japan", "China"],
-        "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832,
-        1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440,
-        14631844184064],
-        "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12]
-    })
-
-    # Instantiate a LLM
-    from pandasai.llm.openai import OpenAI
-    llm = OpenAI(api_token="YOUR_API_TOKEN")
-
-    pandas_ai = PandasAI(llm)
-    pandas_ai(df, prompt='Which are the 5 happiest countries?')
-
-    ```
 """
-import warnings
-from typing import List, Optional, Union, Dict, Type
-import uuid
 import importlib.metadata
 
-import pandas as pd
 from .smart_dataframe import SmartDataframe
 from .smart_datalake import SmartDatalake
-from .prompts.base import AbstractPrompt
-from .schemas.df_config import Config
 from .helpers.cache import Cache
 from .agent import Agent
 from .skills import skill
@@ -50,195 +13,6 @@
 __version__ = importlib.metadata.version(__package__ or __name__)
 
 
-class PandasAI:
-    """
-    PandasAI is a wrapper around a LLM to make dataframes conversational.
-
-
-    This is an entry point of `pandasai` object. This class consists of methods
-    to interface the LLMs with Pandas     dataframes. A pandas dataframe metadata i.e.
-    df.head() and prompt is passed on to chosen LLMs API end point to generate a Python
-    code to answer the questions asked. The resultant python code is run on actual data
-    and answer is converted into a conversational form.
-
-    Note:
-        Do not include the `self` parameter in the ``Args`` section.
-    Args:
-        _llm (obj): LLMs option to be used for API access
-        _verbose (bool, optional): To show the intermediate outputs e.g. python code
-        generated and execution step on the prompt. Default to False
-        _enforce_privacy (bool, optional): Do not display the data on prompt in case of
-        Sensitive data. Default to False
-        _max_retries (int, optional): max no. of tries to generate code on failure.
-        Default to 3
-        _original_instructions (dict, optional): The dict of instruction to run. Default
-        to None
-        _cache (Cache, optional): Cache object to store the results. Default to None
-        _enable_cache (bool, optional): Whether to enable cache. Default to True
-        _logger (logging.Logger, optional): Logger object to log the messages. Default
-        to None
-        _logs (List[dict], optional): List of logs to be stored. Default to []
-        _prompt_id (str, optional): Unique ID to differentiate calls. Default to None
-        _additional_dependencies (List[dict], optional): List of additional dependencies
-        to be added. Default to []
-        _custom_whitelisted_dependencies (List[str], optional): List of custom
-        whitelisted dependencies. Default to []
-        last_code_generated (str, optional): Pass last Code if generated. Default to
-        None
-        last_code_executed (str, optional): Pass the last execution / run. Default to
-        None
-        code_output (str, optional): The code output if any. Default to None
-        last_error (str, optional): Error of running code last time. Default to None
-        prompt_id (str, optional): Unique ID to differentiate calls. Default to None
-
-
-    Returns (str): Response to a Question related to Data
-
-    """
-
-    _dl: SmartDatalake = None
-    _config: Union[Config, dict]
-
-    def __init__(
-        self,
-        llm=None,
-        conversational=False,
-        verbose=False,
-        enforce_privacy=False,
-        save_charts=False,
-        save_charts_path="",
-        enable_cache=True,
-        custom_whitelisted_dependencies=None,
-        enable_logging=True,
-        non_default_prompts: Optional[Dict[str, Type[AbstractPrompt]]] = None,
-    ):
-        """
-        __init__ method of the Class PandasAI
-
-        Args:
-            llm (object): LLMs option to be used for API access. Default is None
-            conversational (bool): Whether to return answer in conversational form.
-            Default to False
-            verbose (bool): To show the intermediate outputs e.g. python code
-            generated and execution step on the prompt.  Default to False
-            enforce_privacy (bool): Execute the codes with Privacy Mode ON.
-            Default to False
-            save_charts (bool): Save the charts generated in the notebook.
-            Default to False
-            enable_cache (bool): Enable the cache to store the results.
-            Default to True
-            custom_whitelisted_dependencies (list): List of custom dependencies to
-            be used. Default to None
-            enable_logging (bool): Enable the logging. Default to True
-            non_default_prompts (dict): Mapping from keys to replacement prompt classes.
-            Used to override specific types of prompts. Defaults to None.
-        """
-
-        # configure the logging
-        # noinspection PyArgumentList
-        # https://stackoverflow.com/questions/61226587/pycharm-does-not-recognize-logging-basicconfig-handlers-argument
-
-        warnings.warn(
-            "`PandasAI` (class) is deprecated since v1.0 and will be removed "
-            "in a future release. Please use `SmartDataframe` instead."
-        )
-
-        self._config = Config(
-            conversational=conversational,
-            verbose=verbose,
-            enforce_privacy=enforce_privacy,
-            save_charts=save_charts,
-            save_charts_path=save_charts_path,
-            enable_cache=enable_cache,
-            custom_whitelisted_dependencies=custom_whitelisted_dependencies or [],
-            enable_logging=enable_logging,
-            non_default_prompts=non_default_prompts,
-            llm=llm,
-        )
-
-    def run(
-        self,
-        data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
-        prompt: str,
-        show_code: bool = False,
-        anonymize_df: bool = True,
-        use_error_correction_framework: bool = True,
-    ) -> Union[str, pd.DataFrame]:
-        """
-        Run the PandasAI to make Dataframes Conversational.
-
-        Args:
-            data_frame (Union[pd.DataFrame, List[pd.DataFrame]]): A pandas Dataframe
-            prompt (str): A prompt to query about the Dataframe
-            show_code (bool): To show the intermediate python code generated on the
-            prompt. Default to False
-            anonymize_df (bool): Running the code with Sensitive Data. Default to True
-            use_error_correction_framework (bool): Turn on Error Correction mechanism.
-            Default to True
-
-        Returns (str): Answer to the Input Questions about the DataFrame
-
-        """
-
-        new_config = self._config.dict()
-        new_config["show_code"] = show_code
-        new_config["anonymize_df"] = anonymize_df
-        new_config["use_error_correction_framework"] = use_error_correction_framework
-
-        config = Config(**new_config).dict()
-
-        if not isinstance(data_frame, list):
-            data_frame = [data_frame]
-
-        self._dl = SmartDatalake(data_frame, config)
-        return self._dl.chat(prompt)
-
-    def __call__(
-        self,
-        data_frame: Union[pd.DataFrame, List[pd.DataFrame]],
-        prompt: str,
-        show_code: bool = False,
-        anonymize_df: bool = True,
-        use_error_correction_framework: bool = True,
-    ) -> Union[str, pd.DataFrame]:
-        """
-        __call__ method of PandasAI class. It calls the `run` method.
-
-        Args:
-            data_frame:
-            prompt:
-            show_code:
-            anonymize_df:
-            use_error_correction_framework:
-
-        Returns (str): Answer to the Input Questions about the DataFrame.
-
-        """
-
-        return self.run(
-            data_frame,
-            prompt,
-            show_code,
-            anonymize_df,
-            use_error_correction_framework,
-        )
-
-    @property
-    def logs(self) -> List[dict[str, str]]:
-        """Return the logs"""
-        return [] if self._dl is None else self._dl.logs
-
-    @property
-    def last_prompt_id(self) -> uuid.UUID:
-        """Return the id of the last prompt that was run."""
-        return None if self._dl is None else self._dl.last_prompt_id
-
-    @property
-    def last_prompt(self) -> str:
-        """Return the last prompt that was executed."""
-        return None if self._dl is None else self._dl.last_prompt
-
-
 def clear_cache(filename: str = None):
     """Clear the cache"""
     cache = Cache(filename or "cache_db")
@@ -246,7 +20,6 @@ def clear_cache(filename: str = None):
 
 
 __all__ = [
-    "PandasAI",
     "SmartDataframe",
     "SmartDatalake",
     "Agent",
diff --git a/tests/helpers/test_openai_info.py b/tests/helpers/test_openai_info.py
index a5ae2a6e2..7d6d5169e 100644
--- a/tests/helpers/test_openai_info.py
+++ b/tests/helpers/test_openai_info.py
@@ -1,7 +1,7 @@
 import pytest
-import openai
 
-from pandasai import PandasAI
+
+from pandasai import SmartDataframe
 from pandasai.helpers import (
     OpenAICallbackHandler,
     get_openai_callback,
@@ -27,11 +27,13 @@ class TestOpenAIInfo:
     def test_handler(self, handler: OpenAICallbackHandler) -> None:
         response = OpenAIObject(
             {
-                "usage": OpenAIObject({
-                    "prompt_tokens": 2,
-                    "completion_tokens": 1,
-                    "total_tokens": 3,
-                }),
+                "usage": OpenAIObject(
+                    {
+                        "prompt_tokens": 2,
+                        "completion_tokens": 1,
+                        "total_tokens": 3,
+                    }
+                ),
                 "model": "gpt-35-turbo",
             }
         )
@@ -45,11 +47,13 @@ def test_handler(self, handler: OpenAICallbackHandler) -> None:
     def test_handler_unknown_model(self, handler: OpenAICallbackHandler) -> None:
         response = OpenAIObject(
             {
-                "usage": OpenAIObject({
-                    "prompt_tokens": 2,
-                    "completion_tokens": 1,
-                    "total_tokens": 3,
-                }),
+                "usage": OpenAIObject(
+                    {
+                        "prompt_tokens": 2,
+                        "completion_tokens": 1,
+                        "total_tokens": 3,
+                    }
+                ),
                 "model": "foo-bar",
             }
         )
@@ -66,20 +70,20 @@ def test_handler_unknown_model(self, handler: OpenAICallbackHandler) -> None:
         [
             ("gpt-3.5-turbo", 0.003),
             (
-                    "gpt-3.5-turbo-0613",
-                    0.003,
+                "gpt-3.5-turbo-0613",
+                0.003,
             ),
             (
-                    "gpt-3.5-turbo-16k-0613",
-                    0.003,
+                "gpt-3.5-turbo-16k-0613",
+                0.003,
             ),
             (
-                    "gpt-3.5-turbo-1106",
-                    0.003,
+                "gpt-3.5-turbo-1106",
+                0.003,
             ),
             (
-                    "gpt-3.5-turbo-16k",
-                    0.003,
+                "gpt-3.5-turbo-16k",
+                0.003,
             ),
             ("gpt-4", 0.09),
             ("gpt-4-0613", 0.09),
@@ -89,15 +93,17 @@ def test_handler_unknown_model(self, handler: OpenAICallbackHandler) -> None:
         ],
     )
     def test_handler_openai(
-            self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float
+        self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float
     ) -> None:
         response = OpenAIObject(
             {
-                "usage": OpenAIObject({
-                    "prompt_tokens": 1000,
-                    "completion_tokens": 1000,
-                    "total_tokens": 2000,
-                }),
+                "usage": OpenAIObject(
+                    {
+                        "prompt_tokens": 1000,
+                        "completion_tokens": 1000,
+                        "total_tokens": 2000,
+                    }
+                ),
                 "model": model_name,
             }
         )
@@ -109,16 +115,16 @@ def test_handler_openai(
         [
             ("gpt-35-turbo", 0.0035),
             (
-                    "gpt-35-turbo-0613",
-                    0.0035,
+                "gpt-35-turbo-0613",
+                0.0035,
             ),
             (
-                    "gpt-35-turbo-16k-0613",
-                    0.007,
+                "gpt-35-turbo-16k-0613",
+                0.007,
             ),
             (
-                    "gpt-35-turbo-16k",
-                    0.007,
+                "gpt-35-turbo-16k",
+                0.007,
             ),
             ("gpt-4", 0.09),
             ("gpt-4-0613", 0.09),
@@ -127,15 +133,17 @@ def test_handler_openai(
         ],
     )
     def test_handler_azure_openai(
-            self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float
+        self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float
     ) -> None:
         response = OpenAIObject(
             {
-                "usage": OpenAIObject({
-                    "prompt_tokens": 1000,
-                    "completion_tokens": 1000,
-                    "total_tokens": 2000,
-                }),
+                "usage": OpenAIObject(
+                    {
+                        "prompt_tokens": 1000,
+                        "completion_tokens": 1000,
+                        "total_tokens": 2000,
+                    }
+                ),
                 "model": model_name,
             }
         )
@@ -150,15 +158,17 @@ def test_handler_azure_openai(
         ],
     )
     def test_handler_finetuned_model(
-            self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float
+        self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float
     ):
         response = OpenAIObject(
             {
-                "usage": OpenAIObject({
-                    "prompt_tokens": 1000,
-                    "completion_tokens": 1000,
-                    "total_tokens": 2000,
-                }),
+                "usage": OpenAIObject(
+                    {
+                        "prompt_tokens": 1000,
+                        "completion_tokens": 1000,
+                        "total_tokens": 2000,
+                    }
+                ),
                 "model": model_name,
             }
         )
@@ -180,18 +190,20 @@ def test_openai_callback(self, mocker):
                     }
                 ],
                 "model": llm.model,
-                "usage": OpenAIObject({
-                    "prompt_tokens": 2,
-                    "completion_tokens": 1,
-                    "total_tokens": 3,
-                }),
+                "usage": OpenAIObject(
+                    {
+                        "prompt_tokens": 2,
+                        "completion_tokens": 1,
+                        "total_tokens": 3,
+                    }
+                ),
             }
         )
         mocker.patch.object(llm.client, "create", return_value=llm_response)
 
-        pandas_ai = PandasAI(llm, enable_cache=False)
+        sdf = SmartDataframe(df, config={"llm": llm, "enable_cache": False})
         with get_openai_callback() as cb:
-            _ = pandas_ai(df, "some question")
+            sdf.chat("some question 1")
             assert cb.total_tokens == 3
             assert cb.prompt_tokens == 2
             assert cb.completion_tokens == 1
@@ -200,14 +212,14 @@ def test_openai_callback(self, mocker):
         total_tokens = cb.total_tokens
 
         with get_openai_callback() as cb:
-            pandas_ai(df, "some question")
-            pandas_ai(df, "some question")
+            sdf.chat("some question 2")
+            sdf.chat("some question 3")
 
         assert cb.total_tokens == total_tokens * 2
 
         with get_openai_callback() as cb:
-            pandas_ai(df, "some question")
-            pandas_ai(df, "some question")
-            pandas_ai(df, "some question")
+            sdf.chat("some question 4")
+            sdf.chat("some question 5")
+            sdf.chat("some question 6")
 
         assert cb.total_tokens == total_tokens * 3
diff --git a/tests/llms/test_huggingface_text_gen.py b/tests/llms/test_huggingface_text_gen.py
index 1925b1561..140e6d882 100644
--- a/tests/llms/test_huggingface_text_gen.py
+++ b/tests/llms/test_huggingface_text_gen.py
@@ -1,5 +1,5 @@
 """Unit tests for the LLaMa2TextGen LLM class"""
-from pandasai import AbstractPrompt
+from pandasai.prompts import AbstractPrompt
 from pandasai.llm import HuggingFaceTextGen
 
 
diff --git a/tests/test_pandasai.py b/tests/test_pandasai.py
deleted file mode 100644
index 0ebcb8168..000000000
--- a/tests/test_pandasai.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import pandas as pd
-from pandasai import PandasAI, SmartDatalake
-from pandasai.llm.fake import FakeLLM
-import pytest
-from unittest.mock import patch
-
-
-class TestPandasAI:
-    @pytest.fixture
-    def llm(self):
-        return FakeLLM()
-
-    @pytest.fixture
-    def df(self):
-        return pd.DataFrame({"a": [1], "b": [4]})
-
-    @pytest.fixture
-    def pai(self, llm):
-        return PandasAI(
-            llm=llm,
-            enable_cache=False,
-        )
-
-    def test_init(self, pai, llm):
-        assert pai._config.llm == llm
-        assert pai._config.custom_prompts == {}
-        assert pai._config.custom_whitelisted_dependencies == []
-        assert pai._config.enable_cache is False
-        assert pai._config.use_error_correction_framework is True
-        assert pai._config.enforce_privacy is False
-        assert pai._config.save_logs is True
-        assert pai._config.save_charts is False
-        assert pai._config.save_charts_path == ""
-        assert pai._config.verbose is False
-        assert pai._config.max_retries == 3
-
-    def test_logs(self, pai):
-        assert pai.logs == []
-
-    def test_last_prompt_id(self, pai):
-        assert pai.last_prompt_id is None
-
-    def test_last_prompt(self, pai):
-        assert pai.last_prompt is None
-
-    @patch.object(SmartDatalake, "chat", return_value="Answer")
-    def test_run(self, _mocked_method, pai, df):
-        assert pai.run(df, "Question") == "Answer"
-
-    @patch.object(SmartDatalake, "chat", side_effect=Exception("Unexpected error"))
-    def test_run_with_exception(self, _mocked_method, pai, df):
-        with pytest.raises(Exception) as e_info:
-            pai.run(df, "Question")
-        assert str(e_info.value) == "Unexpected error"
-
-    @patch.object(SmartDatalake, "chat", side_effect=Exception("Unexpected error"))
-    def test_call_with_exception(self, _mocked_method, pai, df):
-        with pytest.raises(Exception) as e_info:
-            pai(df, "Question")
-        assert str(e_info.value) == "Unexpected error"
-
-    def test_run_with_invalid_arguments(self, pai):
-        with pytest.raises(ValueError) as e_info:
-            pai.run(0, "Question")
-        assert (
-            str(e_info.value)
-            == "Invalid input data. Must be a Pandas or Polars dataframe."
-        )