sinaptik-ai · gventuri · Jan 11, 2024 · Nov 29, 2023 · Dec 2, 2023 · Dec 6, 2023
diff --git a/pandasai/agent/__init__.py b/pandasai/agent/__init__.py
@@ -1,8 +1,7 @@
 import json
 from typing import Union, List, Optional
-
+import pandas as pd
 from pandasai.skills import skill
-from ..helpers.df_info import DataFrameType
 from ..helpers.logger import Logger
 from ..helpers.memory import Memory
 from ..prompts.base import AbstractPrompt
@@ -21,58 +20,55 @@ class Agent:
     Agent class to improve the conversational experience in PandasAI
     """
 
-    _lake: SmartDatalake = None
-    _logger: Optional[Logger] = None
-
     def __init__(
         self,
-        dfs: Union[DataFrameType, List[DataFrameType]],
+        dfs: Union[pd.DataFrame, List[pd.DataFrame]],
         config: Optional[Union[Config, dict]] = None,
         logger: Optional[Logger] = None,
         memory_size: int = 10,
     ):
         """
         Args:
-            df (Union[DataFrameType, List[DataFrameType]]): DataFrame can be Pandas,
+            df (Union[pd.DataFrame, List[pd.DataFrame]]): Pandas dataframe
             Polars or Database connectors
             memory_size (int, optional): Conversation history to use during chat.
             Defaults to 1.
         """
 
+        # Get a list of dataframes, if only one dataframe is passed
         if not isinstance(dfs, list):
             dfs = [dfs]
 
-        self._lake = SmartDatalake(dfs, config, logger, memory=Memory(memory_size))
-
-        # set instance type in SmartDataLake
-        self._lake.set_instance_type(self.__class__.__name__)
+        # Configure the smart datalake
+        self.lake = SmartDatalake(dfs, config, logger, memory=Memory(memory_size))
+        self.lake.set_instance_type(self.__class__.__name__)
 
-        self._logger = self._lake.logger
+        self.logger = self.lake.logger
 
     def add_skills(self, *skills: List[skill]):
         """
         Add Skills to PandasAI
         """
-        self._lake.add_skills(*skills)
+        self.lake.add_skills(*skills)
 
-    def _call_llm_with_prompt(self, prompt: AbstractPrompt):
+    def call_llm_with_prompt(self, prompt: AbstractPrompt):
         """
         Call LLM with prompt using error handling to retry based on config
         Args:
             prompt (AbstractPrompt): AbstractPrompt to pass to LLM's
         """
         retry_count = 0
-        while retry_count < self._lake.config.max_retries:
+        while retry_count < self.lake.config.max_retries:
             try:
-                result: str = self._lake.llm.call(prompt)
+                result: str = self.lake.llm.call(prompt)
                 if prompt.validate(result):
                     return result
                 else:
                     raise Exception("Response validation failed!")
             except Exception:
                 if (
-                    not self._lake.use_error_correction_framework
-                    or retry_count >= self._lake.config.max_retries - 1
+                    not self.lake.config.use_error_correction_framework
+                    or retry_count >= self.lake.config.max_retries - 1
                 ):
                     raise
                 retry_count += 1
@@ -83,8 +79,8 @@ def chat(self, query: str, output_type: Optional[str] = None):
         """
         try:
             is_related = self.check_if_related_to_conversation(query)
-            self._lake.is_related_query(is_related)
-            return self._lake.chat(query, output_type=output_type)
+            self.lake.is_related_query(is_related)
+            return self.lake.chat(query, output_type=output_type)
         except Exception as exception:
             return (
                 "Unfortunately, I was not able to get your answers, "
@@ -98,44 +94,44 @@ def add_message(self, message, is_user=False):
         to the memory without calling the chat function (for example, when you
         need to add a message from the agent).
         """
-        self._lake._memory.add(message, is_user=is_user)
+        self.lake.memory.add(message, is_user=is_user)
 
     def check_if_related_to_conversation(self, query: str) -> bool:
         """
         Check if the query is related to the previous conversation
         """
-        if self._lake._memory.count() == 0:
+        if self.lake.memory.count() == 0:
             return
 
         prompt = CheckIfRelevantToConversationPrompt(
-            conversation=self._lake._memory.get_conversation(),
+            conversation=self.lake.memory.get_conversation(),
             query=query,
         )
 
-        result = self._call_llm_with_prompt(prompt)
+        result = self.call_llm_with_prompt(prompt)
 
-        related = "true" in result
-        self._logger.log(
-            f"""Check if the new message is related to the conversation: {related}"""
+        is_related = "true" in result
+        self.logger.log(
+            f"""Check if the new message is related to the conversation: {is_related}"""
         )
 
-        if not related:
-            self._lake.clear_memory()
+        if not is_related:
+            self.lake.clear_memory()
 
-        return related
+        return is_related
 
     def clarification_questions(self, query: str) -> List[str]:
         """
         Generate clarification questions based on the data
         """
         prompt = ClarificationQuestionPrompt(
-            dataframes=self._lake.dfs,
-            conversation=self._lake._memory.get_conversation(),
+            dataframes=self.lake.dfs,
+            conversation=self.lake.memory.get_conversation(),
             query=query,
         )
 
-        result = self._call_llm_with_prompt(prompt)
-        self._logger.log(
+        result = self.call_llm_with_prompt(prompt)
+        self.logger.log(
             f"""Clarification Questions:  {result}
             """
         )
@@ -146,19 +142,19 @@ def start_new_conversation(self):
         """
         Clears the previous conversation
         """
-        self._lake.clear_memory()
+        self.lake.clear_memory()
 
     def explain(self) -> str:
         """
         Returns the explanation of the code how it reached to the solution
         """
         try:
             prompt = ExplainPrompt(
-                conversation=self._lake._memory.get_conversation(),
-                code=self._lake.last_code_executed,
+                conversation=self.lake.memory.get_conversation(),
+                code=self.lake.last_code_executed,
             )
-            response = self._call_llm_with_prompt(prompt)
-            self._logger.log(
+            response = self.call_llm_with_prompt(prompt)
+            self.logger.log(
                 f"""Explanation:  {response}
                 """
             )
@@ -174,11 +170,11 @@ def rephrase_query(self, query: str):
         try:
             prompt = RephraseQueryPrompt(
                 query=query,
-                dataframes=self._lake.dfs,
-                conversation=self._lake._memory.get_conversation(),
+                dataframes=self.lake.dfs,
+                conversation=self.lake.memory.get_conversation(),
             )
-            response = self._call_llm_with_prompt(prompt)
-            self._logger.log(
+            response = self.call_llm_with_prompt(prompt)
+            self.logger.log(
                 f"""Rephrased Response:  {response}
                 """
             )
@@ -192,16 +188,16 @@ def rephrase_query(self, query: str):
 
     @property
     def last_code_generated(self):
-        return self._lake.last_code_generated
+        return self.lake.last_code_generated
 
     @property
     def last_code_executed(self):
-        return self._lake.last_code_executed
+        return self.lake.last_code_executed
 
     @property
     def last_prompt(self):
-        return self._lake.last_prompt
+        return self.lake.last_prompt
 
     @property
     def last_query_log_id(self):
-        return self._lake.last_query_log_id
+        return self.lake.last_query_log_id
diff --git a/pandasai/connectors/__init__.py b/pandasai/connectors/__init__.py
@@ -11,6 +11,8 @@
 from .yahoo_finance import YahooFinanceConnector
 from .airtable import AirtableConnector
 from .sql import SqliteConnector
+from .pandas import PandasConnector
+from .polars import PolarsConnector
 
 __all__ = [
     "BaseConnector",
@@ -22,4 +24,6 @@
     "DatabricksConnector",
     "AirtableConnector",
     "SqliteConnector",
+    "PandasConnector",
+    "PolarsConnector",
 ]
diff --git a/pandasai/connectors/airtable.py b/pandasai/connectors/airtable.py
@@ -2,7 +2,7 @@
 Airtable connectors are used to connect airtable records.
 """
 
-from .base import AirtableConnectorConfig, BaseConnector, BaseConnectorConfig
+from .base import BaseConnector, BaseConnectorConfig
 from typing import Union, Optional
 import requests
 import pandas as pd
@@ -14,6 +14,16 @@
 from functools import cache, cached_property
 
 
+class AirtableConnectorConfig(BaseConnectorConfig):
+    """
+    Connecter configuration for Airtable data.
+    """
+
+    api_key: str
+    base_id: str
+    database: str = "airtable_data"
+
+
 class AirtableConnector(BaseConnector):
     """
     Airtable connector to retrieving record data.
@@ -136,12 +146,12 @@ def fallback_name(self):
         """
         return self._config.table
 
-    def execute(self):
+    def execute(self) -> pd.DataFrame:
         """
         Execute the connector and return the result.
 
         Returns:
-            DataFrameType: The result of the connector.
+            pd.DataFrame: The result of the connector.
         """
         if cached := self._cached() or self._cached(include_additional_filters=True):
             return pd.read_parquet(cached)
@@ -206,7 +216,7 @@ def _fetch_data(self):
         return pd.DataFrame(data)
 
     @cache
-    def head(self):
+    def head(self, n: int = 5) -> pd.DataFrame:
         """
         Return the head of the table that
           the connector is connected to.
@@ -215,7 +225,7 @@ def head(self):
             DatFrameType: The head of the data source
                  that the connector is connected to .
         """
-        data = self._request_api(params={"maxRecords": 5})
+        data = self._request_api(params={"maxRecords": n})
         return pd.DataFrame(
             [
                 {"id": record["id"], **record["fields"]}