Add super awersome feature

DatapaloozaCO · Nov 28, 2023 · 5b0d959 · 5b0d959
1 parent 41b6c6f
commit 5b0d959
Show file tree

Hide file tree

Showing 25 changed files with 300 additions and 206 deletions.
diff --git a/.gitignore b/.gitignore
@@ -159,4 +159,4 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 playground/
-outputs/
+outputs/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,11 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.3.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/psf/black
+    rev: 22.10.0
+    hooks:
+    -   id: black
diff --git a/LICENSE.md b/LICENSE.md
@@ -17,4 +17,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/palooza_wizard/__init__.py b/palooza_wizard/__init__.py
@@ -2,4 +2,4 @@
 import palooza_wizard.constants as constants
 
 for folder in constants.FOLDERS:
-    files.create_folder_if_not_exists(folder)
+    files.create_folder_if_not_exists(folder)
diff --git a/palooza_wizard/agent.py b/palooza_wizard/agent.py
@@ -2,10 +2,11 @@
 import palooza_wizard.chatgpt as pwc
 import palooza_wizard.constants as ct
 from typing import List
-import os 
+import os
+
 
 def get_agent_code(file_name: str):
-    python_code = load_code_string(file_name = file_name)
+    python_code = load_code_string(file_name=file_name)
     system_message = pwc.get_system_message_for_agent()
     messages = [
         {"role": "system", "content": system_message},
@@ -16,25 +17,33 @@ def get_agent_code(file_name: str):
     with open(f"./{file_name}", "w") as f:
         f.write(agent_code)
 
+
 def get_element_metadata(task: dict) -> tuple:
     data = task["element"]
     tag, attribute, value = data["tag"], data["attribute"], data["value"]
     return tag, attribute, value
 
+
 def get_agent_function(file_path: str) -> str:
-    with open(f'{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}', "r", encoding='windows-1252') as f:
+    with open(
+        f"{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}", "r", encoding="windows-1252"
+    ) as f:
         user_message = f.read()
     function_name = file_path
-    messages = pwc.get_messages_for_function(user_message = user_message, function_name = function_name)
-    completion = pwc.get_completion_from_messages(messages = messages)
+    messages = pwc.get_messages_for_function(
+        user_message=user_message, function_name=function_name
+    )
+    completion = pwc.get_completion_from_messages(messages=messages)
     return completion
 
+
 def save_completion(completion: str, file_name: str) -> None:
     file_name = file_name.replace(".html", ".py")
     with open(f"{ct.AGENT_OUTPUT_FOLDER}/{file_name}", "a") as f:
         f.write(completion)
         f.write("\n\n")
 
+
 def get_agent_functions() -> None:
     """Get agent functions
 
@@ -45,23 +54,24 @@ def get_agent_functions() -> None:
     Return:
         - None
     """
-    #try:
+    # try:
     #    #os.remove(f"{ct.FUNCTIONS_OUTPUT_FOLDER}/{file_name}")
     #    os.remove(file_name)
-    #except:
+    # except:
     #    pass
-    
+
     file_paths = os.listdir(ct.IMPORTANCE_OUTPUT_FOLDER)
-    #file_paths = [file_paths[index] for index in indexes]
+    # file_paths = [file_paths[index] for index in indexes]
 
     for file_path in file_paths:
         completion = get_agent_function(file_path)
-        completion = pwc.format_python_completion(completion = completion)
-        save_completion(completion = completion, file_name = file_path)
+        completion = pwc.format_python_completion(completion=completion)
+        save_completion(completion=completion, file_name=file_path)
+
 
 def load_code_string(file_name: str):
-    #{ct.FUNCTIONS_OUTPUT_FOLDER}
+    # {ct.FUNCTIONS_OUTPUT_FOLDER}
     with open(f"./{file_name}", "r") as f:
-         python_code = f.read()
-         print(python_code)
+        python_code = f.read()
+        print(python_code)
     return python_code
diff --git a/palooza_wizard/algorithms/__init__.py b/palooza_wizard/algorithms/__init__.py
@@ -1 +1 @@
-from .degree_importance import degree_importance
+from .degree_importance import degree_importance
diff --git a/palooza_wizard/algorithms/degree_importance.py b/palooza_wizard/algorithms/degree_importance.py
@@ -2,71 +2,67 @@
 from typing import List
 import palooza_wizard.constants as ct
 from bs4 import BeautifulSoup
-import os 
-import joblib 
-import sys 
-
-def filter_candidates_by_containment(
-        graph: nx.DiGraph, 
-        candidates: List[int]
-    ):
-    """Para todo g1, g2 e I, g1 no contiene a g2 ni g2 a g1
-    """
+import os
+import joblib
+import sys
+
+
+def filter_candidates_by_containment(graph: nx.DiGraph, candidates: List[int]):
+    """Para todo g1, g2 e I, g1 no contiene a g2 ni g2 a g1"""
     inadmissable_nodes = []
     for candidate in candidates:
         if candidate in inadmissable_nodes:
-          continue
+            continue
         descendants = list(nx.descendants(graph, candidate))
         inadmissable_nodes = inadmissable_nodes + descendants
         yield candidate
 
+
 def filter_candidates_by_depth(
-        graph: nx.DiGraph, 
-        root,
-        candidates: List[int], 
-        min_depth: int
-    ):
-    nodes_depth = nx.shortest_path_length(graph, root) 
-    for candidate in candidates: 
+    graph: nx.DiGraph, root, candidates: List[int], min_depth: int
+):
+    nodes_depth = nx.shortest_path_length(graph, root)
+    for candidate in candidates:
         if nodes_depth[candidate] > min_depth:
-          yield candidate
+            yield candidate
+
 
 def filter_candidates(
-        graph: nx.DiGraph, 
-        root: str,
-        candidates: List[int], 
-        min_depth: int = 3
-    ):
-    candidates = [x for x in filter_candidates_by_depth(graph, root, candidates, min_depth)]
+    graph: nx.DiGraph, root: str, candidates: List[int], min_depth: int = 3
+):
+    candidates = [
+        x for x in filter_candidates_by_depth(graph, root, candidates, min_depth)
+    ]
     candidates = [x for x in filter_candidates_by_containment(graph, candidates)]
-    return candidates 
+    return candidates
+
 
 def degree_importance(
-        graph: nx.DiGraph, 
-        root: str, 
-        min_depth: int, 
-        max_candidates: int, 
-        verbose: bool = False
-    ) -> List[int]:
-
-    # Get degree of nodes. 
+    graph: nx.DiGraph,
+    root: str,
+    min_depth: int,
+    max_candidates: int,
+    verbose: bool = False,
+) -> List[int]:
+
+    # Get degree of nodes.
     nodes_degree = list(graph.degree(graph.nodes()))
 
     # Sort nodes by degree.
-    nodes_degree.sort(key = lambda z: z[1], reverse=True)
+    nodes_degree.sort(key=lambda z: z[1], reverse=True)
 
     nodes_degree = nodes_degree[:max_candidates]
 
-    #for key, value in nodes_degree:
+    # for key, value in nodes_degree:
     #    print(f"{key}: {value}")
 
     # Get candidates of nodes.
     candidates = [x[0] for x in nodes_degree]
 
     # Filter candidates.
-    candidates = filter_candidates(graph, root, candidates, min_depth = min_depth)
+    candidates = filter_candidates(graph, root, candidates, min_depth=min_depth)
 
-    if verbose: 
+    if verbose:
         print("Candidates")
         print(candidates)
 

diff --git a/palooza_wizard/chatgpt/__init__.py b/palooza_wizard/chatgpt/__init__.py
@@ -1,3 +1,3 @@
 from .pricing import *
 from .tokens import *
-from .chatgpt import *
+from .chatgpt import *
diff --git a/palooza_wizard/chatgpt/chatgpt.py b/palooza_wizard/chatgpt/chatgpt.py
@@ -1,5 +1,5 @@
 import openai
-from typing import List 
+from typing import List
 from dotenv import dotenv_values
 import palooza_wizard.constants as ct
 import palooza_wizard.chatgpt as chatgpt
@@ -8,34 +8,40 @@
 
 openai.api_key = ct.OPEN_AI_API_KEY
 
-def get_completion_from_messages(messages, model="gpt-4", temperature=0, max_tokens=500): # gpt-4
+
+def get_completion_from_messages(
+    messages, model="gpt-4", temperature=0, max_tokens=500
+):  # gpt-4
     response = openai.ChatCompletion.create(
         model=model,
         messages=messages,
-        temperature=temperature, 
-        max_tokens=max_tokens, 
+        temperature=temperature,
+        max_tokens=max_tokens,
     )
     return response.choices[0].message["content"]
 
+
 def get_system_message_for_function(function_name: str) -> str:
     system_message = f"""
-    Create a Python function with a proper name starting with '{function_name}' that extracts all relevant data from 
-    the provided HTML code by the user and returns the data as a dictionary. The keys of the dictionaries should be as 
-    few as possible. Utilize Beautiful Soup (beautifulsoup4) to implement this Python function. The output should 
+    Create a Python function with a proper name starting with '{function_name}' that extracts all relevant data from
+    the provided HTML code by the user and returns the data as a dictionary. The keys of the dictionaries should be as
+    few as possible. Utilize Beautiful Soup (beautifulsoup4) to implement this Python function. The output should
     consist solely of Python code without any English words.
     """
     return system_message
 
+
 def get_system_message_for_agent() -> str:
     system_message = f"""
-    Create a python class called 'agent' which implements as methods the defined functions passed by the user in the prompt. 
+    Create a python class called 'agent' which implements as methods the defined functions passed by the user in the prompt.
     In addition, include a new method called 'extract_data' that accepts as argument a beautifulsoup object and a url and which uses all the methods of the class and returns a dictionary with the result
     of each method. Constructor method must not recieve any parameter, instead, each method within the class must accept as parameter a BeautifulSoup object
     """
     return system_message
 
+
 def get_messages_for_function(user_message: str, function_name: str) -> List[str]:
-    system_message = get_system_message_for_function(function_name) 
+    system_message = get_system_message_for_function(function_name)
     num_tokens = chatgpt.num_tokens_for_model(system_message + user_message)
     print("Number of tokens to be sent: ", num_tokens)
     messages = [
@@ -44,8 +50,11 @@ def get_messages_for_function(user_message: str, function_name: str) -> List[str
     ]
     return messages
 
+
 def format_python_completion(completion: str) -> str:
     if completion.find("```python") != -1:
-        a, b = completion.find("```python"), completion.find("```", len("```python") + 1, len(completion))
-        return completion[a + len("```python"): b]
+        a, b = completion.find("```python"), completion.find(
+            "```", len("```python") + 1, len(completion)
+        )
+        return completion[a + len("```python") : b]
     return completion
diff --git a/palooza_wizard/chatgpt/constants.py b/palooza_wizard/chatgpt/constants.py
@@ -3,18 +3,18 @@
         "training": 0.0004,
         "input_usage": 0.0016,
         "output_usage": 0.0016,
-        "num_tokens": 1000
-    }, 
+        "num_tokens": 1000,
+    },
     "davinci-002": {
         "training": 0.0060,
         "input_usage": 0.0120,
         "output_usage": 0.0120,
-        "num_tokens": 1000
+        "num_tokens": 1000,
     },
     "GPT-3.5-Turbo": {
         "training": 0.0080,
         "input_usage": 0.0120,
         "output_usage": 0.0160,
-        "num_tokens": 1000
-    }
-}
+        "num_tokens": 1000,
+    },
+}
diff --git a/palooza_wizard/chatgpt/pricing.py b/palooza_wizard/chatgpt/pricing.py
@@ -1,27 +1,31 @@
 import palooza_wizard.chatgpt.constants as ct
-from typing import List 
+from typing import List
+
 
 def get_available_models() -> List[str]:
     return list(ct.PRICING.keys())
 
+
 def validate_model(model: str) -> bool:
     return model in ct.PRICING.keys()
 
+
 def estimated_training_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
     assert validate_model(model), "Invalid model"
     num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
     training_cost = ct.PRICING[model]["training"]
     return (num_tokens / num_tokens_per_cost) * training_cost
 
+
 def estimated_input_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
     assert validate_model(model), "Invalid model"
     num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
     input_cost = ct.PRICING[model]["input_usage"]
     return (num_tokens / num_tokens_per_cost) * input_cost
 
-def estimated_output_usage_cost(num_tokens: int,  model: str = "GPT-3.5-Turbo") -> float:
+
+def estimated_output_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
     assert validate_model(model), "Invalid model"
     num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
     output_cost = ct.PRICING[model]["output_usage"]
     return (num_tokens / num_tokens_per_cost) * output_cost
-
diff --git a/palooza_wizard/chatgpt/tokens.py b/palooza_wizard/chatgpt/tokens.py
@@ -1,5 +1,6 @@
 import tiktoken
 
+
 def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") -> int:
     """This function computes the number of tokens in a string
     #https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
@@ -8,10 +9,11 @@ def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") ->
     num_tokens = len(encoding.encode(string))
     return num_tokens
 
+
 def num_tokens_for_model(string: str, model_name: str = "gpt-4") -> int:
     """This function computers the number of token in a string for a specific model name
     #https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
     """
     encoding = tiktoken.encoding_for_model(model_name)
     num_tokens = len(encoding.encode(string))
-    return num_tokens
+    return num_tokens
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from .degree_importance import degree_importance
		from .degree_importance import degree_importance