From 8345b2208db7ca959a81e54a9d719b40113f50ff Mon Sep 17 00:00:00 2001 From: Gabriele Venturi Date: Thu, 16 Nov 2023 00:08:26 +0100 Subject: [PATCH] refactor: remove PandasAI --- pandasai/__init__.py | 227 ------------------------ tests/helpers/test_openai_info.py | 124 +++++++------ tests/llms/test_huggingface_text_gen.py | 2 +- tests/test_pandasai.py | 68 ------- 4 files changed, 69 insertions(+), 352 deletions(-) delete mode 100644 tests/test_pandasai.py diff --git a/pandasai/__init__.py b/pandasai/__init__.py index c92c0db88..e07391285 100644 --- a/pandasai/__init__.py +++ b/pandasai/__init__.py @@ -1,48 +1,11 @@ # -*- coding: utf-8 -*- """ PandasAI is a wrapper around a LLM to make dataframes conversational - -This module includes the implementation of basis PandasAI class with methods to run -the LLMs models on Pandas dataframes. Following LLMs are implemented so far. - -Example: - - This module is the Entry point of the `pandasai` package. Following is an example - of how to use this Class. - - ```python - import pandas as pd - from pandasai import PandasAI - - # Sample DataFrame - df = pd.DataFrame({ - "country": ["United States", "United Kingdom", "France", "Germany", "Italy", - "Spain", "Canada", "Australia", "Japan", "China"], - "gdp": [19294482071552, 2891615567872, 2411255037952, 3435817336832, - 1745433788416, 1181205135360, 1607402389504, 1490967855104, 4380756541440, - 14631844184064], - "happiness_index": [6.94, 7.16, 6.66, 7.07, 6.38, 6.4, 7.23, 7.22, 5.87, 5.12] - }) - - # Instantiate a LLM - from pandasai.llm.openai import OpenAI - llm = OpenAI(api_token="YOUR_API_TOKEN") - - pandas_ai = PandasAI(llm) - pandas_ai(df, prompt='Which are the 5 happiest countries?') - - ``` """ -import warnings -from typing import List, Optional, Union, Dict, Type -import uuid import importlib.metadata -import pandas as pd from .smart_dataframe import SmartDataframe from .smart_datalake import SmartDatalake -from .prompts.base import AbstractPrompt -from .schemas.df_config import Config from .helpers.cache import Cache from .agent import Agent from .skills import skill @@ -50,195 +13,6 @@ __version__ = importlib.metadata.version(__package__ or __name__) -class PandasAI: - """ - PandasAI is a wrapper around a LLM to make dataframes conversational. - - - This is an entry point of `pandasai` object. This class consists of methods - to interface the LLMs with Pandas dataframes. A pandas dataframe metadata i.e. - df.head() and prompt is passed on to chosen LLMs API end point to generate a Python - code to answer the questions asked. The resultant python code is run on actual data - and answer is converted into a conversational form. - - Note: - Do not include the `self` parameter in the ``Args`` section. - Args: - _llm (obj): LLMs option to be used for API access - _verbose (bool, optional): To show the intermediate outputs e.g. python code - generated and execution step on the prompt. Default to False - _enforce_privacy (bool, optional): Do not display the data on prompt in case of - Sensitive data. Default to False - _max_retries (int, optional): max no. of tries to generate code on failure. - Default to 3 - _original_instructions (dict, optional): The dict of instruction to run. Default - to None - _cache (Cache, optional): Cache object to store the results. Default to None - _enable_cache (bool, optional): Whether to enable cache. Default to True - _logger (logging.Logger, optional): Logger object to log the messages. Default - to None - _logs (List[dict], optional): List of logs to be stored. Default to [] - _prompt_id (str, optional): Unique ID to differentiate calls. Default to None - _additional_dependencies (List[dict], optional): List of additional dependencies - to be added. Default to [] - _custom_whitelisted_dependencies (List[str], optional): List of custom - whitelisted dependencies. Default to [] - last_code_generated (str, optional): Pass last Code if generated. Default to - None - last_code_executed (str, optional): Pass the last execution / run. Default to - None - code_output (str, optional): The code output if any. Default to None - last_error (str, optional): Error of running code last time. Default to None - prompt_id (str, optional): Unique ID to differentiate calls. Default to None - - - Returns (str): Response to a Question related to Data - - """ - - _dl: SmartDatalake = None - _config: Union[Config, dict] - - def __init__( - self, - llm=None, - conversational=False, - verbose=False, - enforce_privacy=False, - save_charts=False, - save_charts_path="", - enable_cache=True, - custom_whitelisted_dependencies=None, - enable_logging=True, - non_default_prompts: Optional[Dict[str, Type[AbstractPrompt]]] = None, - ): - """ - __init__ method of the Class PandasAI - - Args: - llm (object): LLMs option to be used for API access. Default is None - conversational (bool): Whether to return answer in conversational form. - Default to False - verbose (bool): To show the intermediate outputs e.g. python code - generated and execution step on the prompt. Default to False - enforce_privacy (bool): Execute the codes with Privacy Mode ON. - Default to False - save_charts (bool): Save the charts generated in the notebook. - Default to False - enable_cache (bool): Enable the cache to store the results. - Default to True - custom_whitelisted_dependencies (list): List of custom dependencies to - be used. Default to None - enable_logging (bool): Enable the logging. Default to True - non_default_prompts (dict): Mapping from keys to replacement prompt classes. - Used to override specific types of prompts. Defaults to None. - """ - - # configure the logging - # noinspection PyArgumentList - # https://stackoverflow.com/questions/61226587/pycharm-does-not-recognize-logging-basicconfig-handlers-argument - - warnings.warn( - "`PandasAI` (class) is deprecated since v1.0 and will be removed " - "in a future release. Please use `SmartDataframe` instead." - ) - - self._config = Config( - conversational=conversational, - verbose=verbose, - enforce_privacy=enforce_privacy, - save_charts=save_charts, - save_charts_path=save_charts_path, - enable_cache=enable_cache, - custom_whitelisted_dependencies=custom_whitelisted_dependencies or [], - enable_logging=enable_logging, - non_default_prompts=non_default_prompts, - llm=llm, - ) - - def run( - self, - data_frame: Union[pd.DataFrame, List[pd.DataFrame]], - prompt: str, - show_code: bool = False, - anonymize_df: bool = True, - use_error_correction_framework: bool = True, - ) -> Union[str, pd.DataFrame]: - """ - Run the PandasAI to make Dataframes Conversational. - - Args: - data_frame (Union[pd.DataFrame, List[pd.DataFrame]]): A pandas Dataframe - prompt (str): A prompt to query about the Dataframe - show_code (bool): To show the intermediate python code generated on the - prompt. Default to False - anonymize_df (bool): Running the code with Sensitive Data. Default to True - use_error_correction_framework (bool): Turn on Error Correction mechanism. - Default to True - - Returns (str): Answer to the Input Questions about the DataFrame - - """ - - new_config = self._config.dict() - new_config["show_code"] = show_code - new_config["anonymize_df"] = anonymize_df - new_config["use_error_correction_framework"] = use_error_correction_framework - - config = Config(**new_config).dict() - - if not isinstance(data_frame, list): - data_frame = [data_frame] - - self._dl = SmartDatalake(data_frame, config) - return self._dl.chat(prompt) - - def __call__( - self, - data_frame: Union[pd.DataFrame, List[pd.DataFrame]], - prompt: str, - show_code: bool = False, - anonymize_df: bool = True, - use_error_correction_framework: bool = True, - ) -> Union[str, pd.DataFrame]: - """ - __call__ method of PandasAI class. It calls the `run` method. - - Args: - data_frame: - prompt: - show_code: - anonymize_df: - use_error_correction_framework: - - Returns (str): Answer to the Input Questions about the DataFrame. - - """ - - return self.run( - data_frame, - prompt, - show_code, - anonymize_df, - use_error_correction_framework, - ) - - @property - def logs(self) -> List[dict[str, str]]: - """Return the logs""" - return [] if self._dl is None else self._dl.logs - - @property - def last_prompt_id(self) -> uuid.UUID: - """Return the id of the last prompt that was run.""" - return None if self._dl is None else self._dl.last_prompt_id - - @property - def last_prompt(self) -> str: - """Return the last prompt that was executed.""" - return None if self._dl is None else self._dl.last_prompt - - def clear_cache(filename: str = None): """Clear the cache""" cache = Cache(filename or "cache_db") @@ -246,7 +20,6 @@ def clear_cache(filename: str = None): __all__ = [ - "PandasAI", "SmartDataframe", "SmartDatalake", "Agent", diff --git a/tests/helpers/test_openai_info.py b/tests/helpers/test_openai_info.py index a5ae2a6e2..7d6d5169e 100644 --- a/tests/helpers/test_openai_info.py +++ b/tests/helpers/test_openai_info.py @@ -1,7 +1,7 @@ import pytest -import openai -from pandasai import PandasAI + +from pandasai import SmartDataframe from pandasai.helpers import ( OpenAICallbackHandler, get_openai_callback, @@ -27,11 +27,13 @@ class TestOpenAIInfo: def test_handler(self, handler: OpenAICallbackHandler) -> None: response = OpenAIObject( { - "usage": OpenAIObject({ - "prompt_tokens": 2, - "completion_tokens": 1, - "total_tokens": 3, - }), + "usage": OpenAIObject( + { + "prompt_tokens": 2, + "completion_tokens": 1, + "total_tokens": 3, + } + ), "model": "gpt-35-turbo", } ) @@ -45,11 +47,13 @@ def test_handler(self, handler: OpenAICallbackHandler) -> None: def test_handler_unknown_model(self, handler: OpenAICallbackHandler) -> None: response = OpenAIObject( { - "usage": OpenAIObject({ - "prompt_tokens": 2, - "completion_tokens": 1, - "total_tokens": 3, - }), + "usage": OpenAIObject( + { + "prompt_tokens": 2, + "completion_tokens": 1, + "total_tokens": 3, + } + ), "model": "foo-bar", } ) @@ -66,20 +70,20 @@ def test_handler_unknown_model(self, handler: OpenAICallbackHandler) -> None: [ ("gpt-3.5-turbo", 0.003), ( - "gpt-3.5-turbo-0613", - 0.003, + "gpt-3.5-turbo-0613", + 0.003, ), ( - "gpt-3.5-turbo-16k-0613", - 0.003, + "gpt-3.5-turbo-16k-0613", + 0.003, ), ( - "gpt-3.5-turbo-1106", - 0.003, + "gpt-3.5-turbo-1106", + 0.003, ), ( - "gpt-3.5-turbo-16k", - 0.003, + "gpt-3.5-turbo-16k", + 0.003, ), ("gpt-4", 0.09), ("gpt-4-0613", 0.09), @@ -89,15 +93,17 @@ def test_handler_unknown_model(self, handler: OpenAICallbackHandler) -> None: ], ) def test_handler_openai( - self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float + self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float ) -> None: response = OpenAIObject( { - "usage": OpenAIObject({ - "prompt_tokens": 1000, - "completion_tokens": 1000, - "total_tokens": 2000, - }), + "usage": OpenAIObject( + { + "prompt_tokens": 1000, + "completion_tokens": 1000, + "total_tokens": 2000, + } + ), "model": model_name, } ) @@ -109,16 +115,16 @@ def test_handler_openai( [ ("gpt-35-turbo", 0.0035), ( - "gpt-35-turbo-0613", - 0.0035, + "gpt-35-turbo-0613", + 0.0035, ), ( - "gpt-35-turbo-16k-0613", - 0.007, + "gpt-35-turbo-16k-0613", + 0.007, ), ( - "gpt-35-turbo-16k", - 0.007, + "gpt-35-turbo-16k", + 0.007, ), ("gpt-4", 0.09), ("gpt-4-0613", 0.09), @@ -127,15 +133,17 @@ def test_handler_openai( ], ) def test_handler_azure_openai( - self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float + self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float ) -> None: response = OpenAIObject( { - "usage": OpenAIObject({ - "prompt_tokens": 1000, - "completion_tokens": 1000, - "total_tokens": 2000, - }), + "usage": OpenAIObject( + { + "prompt_tokens": 1000, + "completion_tokens": 1000, + "total_tokens": 2000, + } + ), "model": model_name, } ) @@ -150,15 +158,17 @@ def test_handler_azure_openai( ], ) def test_handler_finetuned_model( - self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float + self, handler: OpenAICallbackHandler, model_name: str, expected_cost: float ): response = OpenAIObject( { - "usage": OpenAIObject({ - "prompt_tokens": 1000, - "completion_tokens": 1000, - "total_tokens": 2000, - }), + "usage": OpenAIObject( + { + "prompt_tokens": 1000, + "completion_tokens": 1000, + "total_tokens": 2000, + } + ), "model": model_name, } ) @@ -180,18 +190,20 @@ def test_openai_callback(self, mocker): } ], "model": llm.model, - "usage": OpenAIObject({ - "prompt_tokens": 2, - "completion_tokens": 1, - "total_tokens": 3, - }), + "usage": OpenAIObject( + { + "prompt_tokens": 2, + "completion_tokens": 1, + "total_tokens": 3, + } + ), } ) mocker.patch.object(llm.client, "create", return_value=llm_response) - pandas_ai = PandasAI(llm, enable_cache=False) + sdf = SmartDataframe(df, config={"llm": llm, "enable_cache": False}) with get_openai_callback() as cb: - _ = pandas_ai(df, "some question") + sdf.chat("some question 1") assert cb.total_tokens == 3 assert cb.prompt_tokens == 2 assert cb.completion_tokens == 1 @@ -200,14 +212,14 @@ def test_openai_callback(self, mocker): total_tokens = cb.total_tokens with get_openai_callback() as cb: - pandas_ai(df, "some question") - pandas_ai(df, "some question") + sdf.chat("some question 2") + sdf.chat("some question 3") assert cb.total_tokens == total_tokens * 2 with get_openai_callback() as cb: - pandas_ai(df, "some question") - pandas_ai(df, "some question") - pandas_ai(df, "some question") + sdf.chat("some question 4") + sdf.chat("some question 5") + sdf.chat("some question 6") assert cb.total_tokens == total_tokens * 3 diff --git a/tests/llms/test_huggingface_text_gen.py b/tests/llms/test_huggingface_text_gen.py index 1925b1561..140e6d882 100644 --- a/tests/llms/test_huggingface_text_gen.py +++ b/tests/llms/test_huggingface_text_gen.py @@ -1,5 +1,5 @@ """Unit tests for the LLaMa2TextGen LLM class""" -from pandasai import AbstractPrompt +from pandasai.prompts import AbstractPrompt from pandasai.llm import HuggingFaceTextGen diff --git a/tests/test_pandasai.py b/tests/test_pandasai.py deleted file mode 100644 index 0ebcb8168..000000000 --- a/tests/test_pandasai.py +++ /dev/null @@ -1,68 +0,0 @@ -import pandas as pd -from pandasai import PandasAI, SmartDatalake -from pandasai.llm.fake import FakeLLM -import pytest -from unittest.mock import patch - - -class TestPandasAI: - @pytest.fixture - def llm(self): - return FakeLLM() - - @pytest.fixture - def df(self): - return pd.DataFrame({"a": [1], "b": [4]}) - - @pytest.fixture - def pai(self, llm): - return PandasAI( - llm=llm, - enable_cache=False, - ) - - def test_init(self, pai, llm): - assert pai._config.llm == llm - assert pai._config.custom_prompts == {} - assert pai._config.custom_whitelisted_dependencies == [] - assert pai._config.enable_cache is False - assert pai._config.use_error_correction_framework is True - assert pai._config.enforce_privacy is False - assert pai._config.save_logs is True - assert pai._config.save_charts is False - assert pai._config.save_charts_path == "" - assert pai._config.verbose is False - assert pai._config.max_retries == 3 - - def test_logs(self, pai): - assert pai.logs == [] - - def test_last_prompt_id(self, pai): - assert pai.last_prompt_id is None - - def test_last_prompt(self, pai): - assert pai.last_prompt is None - - @patch.object(SmartDatalake, "chat", return_value="Answer") - def test_run(self, _mocked_method, pai, df): - assert pai.run(df, "Question") == "Answer" - - @patch.object(SmartDatalake, "chat", side_effect=Exception("Unexpected error")) - def test_run_with_exception(self, _mocked_method, pai, df): - with pytest.raises(Exception) as e_info: - pai.run(df, "Question") - assert str(e_info.value) == "Unexpected error" - - @patch.object(SmartDatalake, "chat", side_effect=Exception("Unexpected error")) - def test_call_with_exception(self, _mocked_method, pai, df): - with pytest.raises(Exception) as e_info: - pai(df, "Question") - assert str(e_info.value) == "Unexpected error" - - def test_run_with_invalid_arguments(self, pai): - with pytest.raises(ValueError) as e_info: - pai.run(0, "Question") - assert ( - str(e_info.value) - == "Invalid input data. Must be a Pandas or Polars dataframe." - )