fix

DatapaloozaCO · Nov 28, 2023 · 4a60938 · 4a60938
1 parent 5b0d959
commit 4a60938
Show file tree

Hide file tree

Showing 13 changed files with 122 additions and 37 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,3 +9,5 @@ repos:
     rev: 22.10.0
     hooks:
     -   id: black
+        language_version: python3.11
+        args: [--line-length=70]
diff --git a/palooza_wizard/agent.py b/palooza_wizard/agent.py
@@ -20,13 +20,19 @@ def get_agent_code(file_name: str):
 
 def get_element_metadata(task: dict) -> tuple:
     data = task["element"]
-    tag, attribute, value = data["tag"], data["attribute"], data["value"]
+    tag, attribute, value = (
+        data["tag"],
+        data["attribute"],
+        data["value"],
+    )
     return tag, attribute, value
 
 
 def get_agent_function(file_path: str) -> str:
     with open(
-        f"{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}", "r", encoding="windows-1252"
+        f"{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}",
+        "r",
+        encoding="windows-1252",
     ) as f:
         user_message = f.read()
     function_name = file_path
@@ -65,7 +71,9 @@ def get_agent_functions() -> None:
 
     for file_path in file_paths:
         completion = get_agent_function(file_path)
-        completion = pwc.format_python_completion(completion=completion)
+        completion = pwc.format_python_completion(
+            completion=completion
+        )
         save_completion(completion=completion, file_name=file_path)
 
 

diff --git a/palooza_wizard/algorithms/degree_importance.py b/palooza_wizard/algorithms/degree_importance.py
@@ -7,7 +7,9 @@
 import sys
 
 
-def filter_candidates_by_containment(graph: nx.DiGraph, candidates: List[int]):
+def filter_candidates_by_containment(
+    graph: nx.DiGraph, candidates: List[int]
+):
     """Para todo g1, g2 e I, g1 no contiene a g2 ni g2 a g1"""
     inadmissable_nodes = []
     for candidate in candidates:
@@ -28,12 +30,20 @@ def filter_candidates_by_depth(
 
 
 def filter_candidates(
-    graph: nx.DiGraph, root: str, candidates: List[int], min_depth: int = 3
+    graph: nx.DiGraph,
+    root: str,
+    candidates: List[int],
+    min_depth: int = 3,
 ):
     candidates = [
-        x for x in filter_candidates_by_depth(graph, root, candidates, min_depth)
+        x
+        for x in filter_candidates_by_depth(
+            graph, root, candidates, min_depth
+        )
+    ]
+    candidates = [
+        x for x in filter_candidates_by_containment(graph, candidates)
     ]
-    candidates = [x for x in filter_candidates_by_containment(graph, candidates)]
     return candidates
 
 
@@ -60,7 +70,9 @@ def degree_importance(
     candidates = [x[0] for x in nodes_degree]
 
     # Filter candidates.
-    candidates = filter_candidates(graph, root, candidates, min_depth=min_depth)
+    candidates = filter_candidates(
+        graph, root, candidates, min_depth=min_depth
+    )
 
     if verbose:
         print("Candidates")

diff --git a/palooza_wizard/chatgpt/chatgpt.py b/palooza_wizard/chatgpt/chatgpt.py
@@ -40,9 +40,13 @@ def get_system_message_for_agent() -> str:
     return system_message
 
 
-def get_messages_for_function(user_message: str, function_name: str) -> List[str]:
+def get_messages_for_function(
+    user_message: str, function_name: str
+) -> List[str]:
     system_message = get_system_message_for_function(function_name)
-    num_tokens = chatgpt.num_tokens_for_model(system_message + user_message)
+    num_tokens = chatgpt.num_tokens_for_model(
+        system_message + user_message
+    )
     print("Number of tokens to be sent: ", num_tokens)
     messages = [
         {"role": "system", "content": system_message},

diff --git a/palooza_wizard/chatgpt/pricing.py b/palooza_wizard/chatgpt/pricing.py
@@ -10,21 +10,27 @@ def validate_model(model: str) -> bool:
     return model in ct.PRICING.keys()
 
 
-def estimated_training_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
+def estimated_training_cost(
+    num_tokens: int, model: str = "GPT-3.5-Turbo"
+) -> float:
     assert validate_model(model), "Invalid model"
     num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
     training_cost = ct.PRICING[model]["training"]
     return (num_tokens / num_tokens_per_cost) * training_cost
 
 
-def estimated_input_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
+def estimated_input_usage_cost(
+    num_tokens: int, model: str = "GPT-3.5-Turbo"
+) -> float:
     assert validate_model(model), "Invalid model"
     num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
     input_cost = ct.PRICING[model]["input_usage"]
     return (num_tokens / num_tokens_per_cost) * input_cost
 
 
-def estimated_output_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
+def estimated_output_usage_cost(
+    num_tokens: int, model: str = "GPT-3.5-Turbo"
+) -> float:
     assert validate_model(model), "Invalid model"
     num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
     output_cost = ct.PRICING[model]["output_usage"]

diff --git a/palooza_wizard/chatgpt/tokens.py b/palooza_wizard/chatgpt/tokens.py
@@ -1,7 +1,9 @@
 import tiktoken
 
 
-def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") -> int:
+def num_tokens_with_encoding(
+    string: str, encoding_name: str = "cl100k_base"
+) -> int:
     """This function computes the number of tokens in a string
     #https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
     """
@@ -10,7 +12,9 @@ def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") ->
     return num_tokens
 
 
-def num_tokens_for_model(string: str, model_name: str = "gpt-4") -> int:
+def num_tokens_for_model(
+    string: str, model_name: str = "gpt-4"
+) -> int:
     """This function computers the number of token in a string for a specific model name
     #https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
     """

diff --git a/palooza_wizard/graph.py b/palooza_wizard/graph.py
@@ -37,7 +37,9 @@ def get_color(self, soup: BeautifulSoup) -> None:
             self.sizes.append(1)
 
     # Get node name
-    def get_node_name(self, soup: BeautifulSoup, parent_name: str, index: int) -> None:
+    def get_node_name(
+        self, soup: BeautifulSoup, parent_name: str, index: int
+    ) -> None:
         self.get_color(soup)
         node_name = f"{parent_name}__{index}__{str(soup.name)}__*"
         return node_name
@@ -63,7 +65,11 @@ def get_node_properties(
 
     # Add nodes to the graph
     def add_nodes(
-        self, soup: BeautifulSoup, parent_name: str = "", index: int = 1, depth: int = 0
+        self,
+        soup: BeautifulSoup,
+        parent_name: str = "",
+        index: int = 1,
+        depth: int = 0,
     ):
 
         if soup is None:
@@ -75,7 +81,9 @@ def add_nodes(
             node_name = self.get_node_name(soup, parent_name, index)
 
             # Add node to the graph.
-            properties = self.get_node_properties(soup, parent_name, node_name)
+            properties = self.get_node_properties(
+                soup, parent_name, node_name
+            )
             self.G.add_node(node_name, **properties)
             self.counter += 1
 
@@ -94,9 +102,13 @@ def add_nodes(
             # Add children only if there is more than 1 children
             # if len(children) > 1:
             for i in range(len(children)):
-                self.add_nodes(children[i], node_name, i + 1, depth + 1)
+                self.add_nodes(
+                    children[i], node_name, i + 1, depth + 1
+                )
 
-    def get_graph(self, soup: BeautifulSoup, labels_to_integers: bool = True):
+    def get_graph(
+        self, soup: BeautifulSoup, labels_to_integers: bool = True
+    ):
         self.add_nodes(soup, ct.ROOT_LABEL)
         if labels_to_integers:
             self.G = nx.convert_node_labels_to_integers(self.G)

diff --git a/palooza_wizard/utils/files.py b/palooza_wizard/utils/files.py
@@ -7,7 +7,9 @@
 from typing import List
 
 
-def get_files_in_folder(folder_path: str, full_path: bool = True) -> List[str]:
+def get_files_in_folder(
+    folder_path: str, full_path: bool = True
+) -> List[str]:
     if not file_exists(folder_path):
         raise Exception("Folder not found")
     files = os.listdir(folder_path)
@@ -68,7 +70,9 @@ def get_html_from_url(url: str, use_proxies: bool = True) -> str:
     return data.content
 
 
-def get_soup_from_url(url: str, use_proxies: bool = True) -> BeautifulSoup:
+def get_soup_from_url(
+    url: str, use_proxies: bool = True
+) -> BeautifulSoup:
     """Download HTML file and return a BeautifulSoup object using get_html_from_url function"""
     soup = get_html_from_url(url, use_proxies=use_proxies)
     soup = BeautifulSoup(soup, "html.parser")

diff --git a/palooza_wizard_cli.py b/palooza_wizard_cli.py
@@ -4,7 +4,9 @@
 try:
     from palooza_wizard import DatapaloozaWizard
 except ImportError as e:
-    msg = '"{}"\nPlease install `palooza_wizard` to resolve this error.'
+    msg = (
+        '"{}"\nPlease install `palooza_wizard` to resolve this error.'
+    )
     raise ImportError(msg.format(str(e)))
 
 
@@ -16,24 +18,34 @@ def main():
         help="search engine(s) - " + ", ".join(search_engines_dict),
         default="google",
     )
-    ap.add_argument("-o", help="output file [html, csv, json]", default="print")
     ap.add_argument(
-        "-n", help="filename for output file", default=config.OUTPUT_DIR + "output"
+        "-o", help="output file [html, csv, json]", default="print"
+    )
+    ap.add_argument(
+        "-n",
+        help="filename for output file",
+        default=config.OUTPUT_DIR + "output",
     )
     ap.add_argument(
         "-p",
         help="number of pages",
         default=config.SEARCH_ENGINE_RESULTS_PAGES,
         type=int,
     )
-    ap.add_argument("-f", help="filter results [url, title, text, host]", default=None)
+    ap.add_argument(
+        "-f",
+        help="filter results [url, title, text, host]",
+        default=None,
+    )
     ap.add_argument(
         "-i",
         help="ignore duplicats, useful when multiple search engines are used",
         action="store_true",
     )
     ap.add_argument(
-        "-proxy", help="use proxy (protocol://ip:port)", default=config.PROXY
+        "-proxy",
+        help="use proxy (protocol://ip:port)",
+        default=config.PROXY,
     )
 
     args = ap.parse_args()
@@ -47,7 +59,10 @@ def main():
     ]
 
     if not engines:
-        print("Please choose a search engine: " + ", ".join(search_engines_dict))
+        print(
+            "Please choose a search engine: "
+            + ", ".join(search_engines_dict)
+        )
     else:
         if "all" in engines:
             engine = AllSearchEngines(proxy, timeout)

diff --git a/setup.py b/setup.py
@@ -4,7 +4,9 @@
 
 here = os.path.abspath(os.path.dirname(__file__))
 
-with codecs.open(os.path.join(here, "README.md"), encoding="utf-8") as fh:
+with codecs.open(
+    os.path.join(here, "README.md"), encoding="utf-8"
+) as fh:
     long_description = "\\n" + fh.read()
 
 requirements = []

diff --git a/streamlit/01_🧙‍♂️_Palooza_Wizard.py b/streamlit/01_🧙‍♂️_Palooza_Wizard.py
@@ -4,7 +4,9 @@
 import streamlit.components.v1 as components
 import palooza_wizard as wizard
 
-st.set_page_config(page_title="Palooza Wizard 🧙‍♂️", page_icon="🧙‍♂️", layout="wide")
+st.set_page_config(
+    page_title="Palooza Wizard 🧙‍♂️", page_icon="🧙‍♂️", layout="wide"
+)
 st.title("Palooza Wizard 🧙‍♂️")
 
 st.markdown(
@@ -13,7 +15,9 @@
 st.divider()
 interacted = 0
 
-url = st.text_input("Input an URL", placeholder="https://www.google.com/", value="")
+url = st.text_input(
+    "Input an URL", placeholder="https://www.google.com/", value=""
+)
 valid_url = 1 if validators.url(url) else 0
 if not valid_url and url != "":
     st.error("The URL is not valid", icon="🚨")

diff --git a/streamlit/pages/02_🤔_About.py b/streamlit/pages/02_🤔_About.py
@@ -2,7 +2,9 @@
 import trubrics
 import validators
 
-st.set_page_config(page_title="Palooza Wizard 🧙‍♂️", page_icon="🧙‍♂️", layout="wide")
+st.set_page_config(
+    page_title="Palooza Wizard 🧙‍♂️", page_icon="🧙‍♂️", layout="wide"
+)
 st.title("About Palooza Wizard 🤔")
 
 st.header("🤓 Goal")
@@ -18,4 +20,6 @@
 st.markdown("lorem10")
 
 st.markdown("### ➡️ Next Page: [🚀 ](/)", unsafe_allow_html=False)
-st.markdown("### ➡️ Visit our Website: [🚀 Datapalooza](https://datapalooza.co)")
+st.markdown(
+    "### ➡️ Visit our Website: [🚀 Datapalooza](https://datapalooza.co)"
+)
diff --git a/streamlit/pages/03_🚀_Datapalooza.py b/streamlit/pages/03_🚀_Datapalooza.py
@@ -60,12 +60,18 @@
 
 col1, col2 = st.columns(2)
 with col1:
-    st.markdown("### [🔵 Linkedin](https://www.linkedin.com/company/datapalooza/)")
+    st.markdown(
+        "### [🔵 Linkedin](https://www.linkedin.com/company/datapalooza/)"
+    )
     # st.image('images/octocat.png', width=150)
-    st.write("Get to know our data services and products. Contact us today!")
+    st.write(
+        "Get to know our data services and products. Contact us today!"
+    )
 
 with col2:
-    st.markdown("### [:incoming_envelope: Email](mailto:[email protected])")
+    st.markdown(
+        "### [:incoming_envelope: Email](mailto:[email protected])"
+    )
     # st.image('images/kaggle.png', width=125)
     st.write(
         "Do you have questions or a special inquery? Write us to **[email protected]**"
@@ -74,4 +80,6 @@
 st.markdown(
     "### ➡️ Visit Chain Breaker 🔗 Website: [here](https://chainbreaker.datapalooza.co/)"
 )
-st.markdown("### ➡️ Visit our Website: [🚀 Datapalooza](https://datapalooza.co)")
+st.markdown(
+    "### ➡️ Visit our Website: [🚀 Datapalooza](https://datapalooza.co)"
+)