From 4a60938ade631dcb4bbaa04455b25141934f3706 Mon Sep 17 00:00:00 2001 From: Juanchobanano Date: Tue, 28 Nov 2023 16:25:06 +0200 Subject: [PATCH] fix --- .pre-commit-config.yaml | 2 ++ palooza_wizard/agent.py | 14 +++++++--- .../algorithms/degree_importance.py | 22 +++++++++++---- palooza_wizard/chatgpt/chatgpt.py | 8 ++++-- palooza_wizard/chatgpt/pricing.py | 12 ++++++--- palooza_wizard/chatgpt/tokens.py | 8 ++++-- palooza_wizard/graph.py | 22 +++++++++++---- palooza_wizard/utils/files.py | 8 ++++-- palooza_wizard_cli.py | 27 ++++++++++++++----- setup.py | 4 ++- ...342\231\202\357\270\217_Palooza_Wizard.py" | 8 ++++-- .../pages/02_\360\237\244\224_About.py" | 8 ++++-- .../pages/03_\360\237\232\200_Datapalooza.py" | 16 ++++++++--- 13 files changed, 122 insertions(+), 37 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 48ee030..464c29c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,3 +9,5 @@ repos: rev: 22.10.0 hooks: - id: black + language_version: python3.11 + args: [--line-length=70] diff --git a/palooza_wizard/agent.py b/palooza_wizard/agent.py index 280e028..8c60f38 100644 --- a/palooza_wizard/agent.py +++ b/palooza_wizard/agent.py @@ -20,13 +20,19 @@ def get_agent_code(file_name: str): def get_element_metadata(task: dict) -> tuple: data = task["element"] - tag, attribute, value = data["tag"], data["attribute"], data["value"] + tag, attribute, value = ( + data["tag"], + data["attribute"], + data["value"], + ) return tag, attribute, value def get_agent_function(file_path: str) -> str: with open( - f"{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}", "r", encoding="windows-1252" + f"{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}", + "r", + encoding="windows-1252", ) as f: user_message = f.read() function_name = file_path @@ -65,7 +71,9 @@ def get_agent_functions() -> None: for file_path in file_paths: completion = get_agent_function(file_path) - completion = pwc.format_python_completion(completion=completion) + completion = pwc.format_python_completion( + completion=completion + ) save_completion(completion=completion, file_name=file_path) diff --git a/palooza_wizard/algorithms/degree_importance.py b/palooza_wizard/algorithms/degree_importance.py index 92f4f11..c491393 100644 --- a/palooza_wizard/algorithms/degree_importance.py +++ b/palooza_wizard/algorithms/degree_importance.py @@ -7,7 +7,9 @@ import sys -def filter_candidates_by_containment(graph: nx.DiGraph, candidates: List[int]): +def filter_candidates_by_containment( + graph: nx.DiGraph, candidates: List[int] +): """Para todo g1, g2 e I, g1 no contiene a g2 ni g2 a g1""" inadmissable_nodes = [] for candidate in candidates: @@ -28,12 +30,20 @@ def filter_candidates_by_depth( def filter_candidates( - graph: nx.DiGraph, root: str, candidates: List[int], min_depth: int = 3 + graph: nx.DiGraph, + root: str, + candidates: List[int], + min_depth: int = 3, ): candidates = [ - x for x in filter_candidates_by_depth(graph, root, candidates, min_depth) + x + for x in filter_candidates_by_depth( + graph, root, candidates, min_depth + ) + ] + candidates = [ + x for x in filter_candidates_by_containment(graph, candidates) ] - candidates = [x for x in filter_candidates_by_containment(graph, candidates)] return candidates @@ -60,7 +70,9 @@ def degree_importance( candidates = [x[0] for x in nodes_degree] # Filter candidates. - candidates = filter_candidates(graph, root, candidates, min_depth=min_depth) + candidates = filter_candidates( + graph, root, candidates, min_depth=min_depth + ) if verbose: print("Candidates") diff --git a/palooza_wizard/chatgpt/chatgpt.py b/palooza_wizard/chatgpt/chatgpt.py index 5e0d7fa..96fdc2a 100644 --- a/palooza_wizard/chatgpt/chatgpt.py +++ b/palooza_wizard/chatgpt/chatgpt.py @@ -40,9 +40,13 @@ def get_system_message_for_agent() -> str: return system_message -def get_messages_for_function(user_message: str, function_name: str) -> List[str]: +def get_messages_for_function( + user_message: str, function_name: str +) -> List[str]: system_message = get_system_message_for_function(function_name) - num_tokens = chatgpt.num_tokens_for_model(system_message + user_message) + num_tokens = chatgpt.num_tokens_for_model( + system_message + user_message + ) print("Number of tokens to be sent: ", num_tokens) messages = [ {"role": "system", "content": system_message}, diff --git a/palooza_wizard/chatgpt/pricing.py b/palooza_wizard/chatgpt/pricing.py index 6a8b874..1257d8e 100644 --- a/palooza_wizard/chatgpt/pricing.py +++ b/palooza_wizard/chatgpt/pricing.py @@ -10,21 +10,27 @@ def validate_model(model: str) -> bool: return model in ct.PRICING.keys() -def estimated_training_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float: +def estimated_training_cost( + num_tokens: int, model: str = "GPT-3.5-Turbo" +) -> float: assert validate_model(model), "Invalid model" num_tokens_per_cost = ct.PRICING[model]["num_tokens"] training_cost = ct.PRICING[model]["training"] return (num_tokens / num_tokens_per_cost) * training_cost -def estimated_input_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float: +def estimated_input_usage_cost( + num_tokens: int, model: str = "GPT-3.5-Turbo" +) -> float: assert validate_model(model), "Invalid model" num_tokens_per_cost = ct.PRICING[model]["num_tokens"] input_cost = ct.PRICING[model]["input_usage"] return (num_tokens / num_tokens_per_cost) * input_cost -def estimated_output_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float: +def estimated_output_usage_cost( + num_tokens: int, model: str = "GPT-3.5-Turbo" +) -> float: assert validate_model(model), "Invalid model" num_tokens_per_cost = ct.PRICING[model]["num_tokens"] output_cost = ct.PRICING[model]["output_usage"] diff --git a/palooza_wizard/chatgpt/tokens.py b/palooza_wizard/chatgpt/tokens.py index f958296..639b9e3 100644 --- a/palooza_wizard/chatgpt/tokens.py +++ b/palooza_wizard/chatgpt/tokens.py @@ -1,7 +1,9 @@ import tiktoken -def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") -> int: +def num_tokens_with_encoding( + string: str, encoding_name: str = "cl100k_base" +) -> int: """This function computes the number of tokens in a string #https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb """ @@ -10,7 +12,9 @@ def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") -> return num_tokens -def num_tokens_for_model(string: str, model_name: str = "gpt-4") -> int: +def num_tokens_for_model( + string: str, model_name: str = "gpt-4" +) -> int: """This function computers the number of token in a string for a specific model name #https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb """ diff --git a/palooza_wizard/graph.py b/palooza_wizard/graph.py index 565fb9d..bb45b56 100644 --- a/palooza_wizard/graph.py +++ b/palooza_wizard/graph.py @@ -37,7 +37,9 @@ def get_color(self, soup: BeautifulSoup) -> None: self.sizes.append(1) # Get node name - def get_node_name(self, soup: BeautifulSoup, parent_name: str, index: int) -> None: + def get_node_name( + self, soup: BeautifulSoup, parent_name: str, index: int + ) -> None: self.get_color(soup) node_name = f"{parent_name}__{index}__{str(soup.name)}__*" return node_name @@ -63,7 +65,11 @@ def get_node_properties( # Add nodes to the graph def add_nodes( - self, soup: BeautifulSoup, parent_name: str = "", index: int = 1, depth: int = 0 + self, + soup: BeautifulSoup, + parent_name: str = "", + index: int = 1, + depth: int = 0, ): if soup is None: @@ -75,7 +81,9 @@ def add_nodes( node_name = self.get_node_name(soup, parent_name, index) # Add node to the graph. - properties = self.get_node_properties(soup, parent_name, node_name) + properties = self.get_node_properties( + soup, parent_name, node_name + ) self.G.add_node(node_name, **properties) self.counter += 1 @@ -94,9 +102,13 @@ def add_nodes( # Add children only if there is more than 1 children # if len(children) > 1: for i in range(len(children)): - self.add_nodes(children[i], node_name, i + 1, depth + 1) + self.add_nodes( + children[i], node_name, i + 1, depth + 1 + ) - def get_graph(self, soup: BeautifulSoup, labels_to_integers: bool = True): + def get_graph( + self, soup: BeautifulSoup, labels_to_integers: bool = True + ): self.add_nodes(soup, ct.ROOT_LABEL) if labels_to_integers: self.G = nx.convert_node_labels_to_integers(self.G) diff --git a/palooza_wizard/utils/files.py b/palooza_wizard/utils/files.py index 8f85425..d6afc70 100644 --- a/palooza_wizard/utils/files.py +++ b/palooza_wizard/utils/files.py @@ -7,7 +7,9 @@ from typing import List -def get_files_in_folder(folder_path: str, full_path: bool = True) -> List[str]: +def get_files_in_folder( + folder_path: str, full_path: bool = True +) -> List[str]: if not file_exists(folder_path): raise Exception("Folder not found") files = os.listdir(folder_path) @@ -68,7 +70,9 @@ def get_html_from_url(url: str, use_proxies: bool = True) -> str: return data.content -def get_soup_from_url(url: str, use_proxies: bool = True) -> BeautifulSoup: +def get_soup_from_url( + url: str, use_proxies: bool = True +) -> BeautifulSoup: """Download HTML file and return a BeautifulSoup object using get_html_from_url function""" soup = get_html_from_url(url, use_proxies=use_proxies) soup = BeautifulSoup(soup, "html.parser") diff --git a/palooza_wizard_cli.py b/palooza_wizard_cli.py index 034cfe3..88c17f4 100644 --- a/palooza_wizard_cli.py +++ b/palooza_wizard_cli.py @@ -4,7 +4,9 @@ try: from palooza_wizard import DatapaloozaWizard except ImportError as e: - msg = '"{}"\nPlease install `palooza_wizard` to resolve this error.' + msg = ( + '"{}"\nPlease install `palooza_wizard` to resolve this error.' + ) raise ImportError(msg.format(str(e))) @@ -16,9 +18,13 @@ def main(): help="search engine(s) - " + ", ".join(search_engines_dict), default="google", ) - ap.add_argument("-o", help="output file [html, csv, json]", default="print") ap.add_argument( - "-n", help="filename for output file", default=config.OUTPUT_DIR + "output" + "-o", help="output file [html, csv, json]", default="print" + ) + ap.add_argument( + "-n", + help="filename for output file", + default=config.OUTPUT_DIR + "output", ) ap.add_argument( "-p", @@ -26,14 +32,20 @@ def main(): default=config.SEARCH_ENGINE_RESULTS_PAGES, type=int, ) - ap.add_argument("-f", help="filter results [url, title, text, host]", default=None) + ap.add_argument( + "-f", + help="filter results [url, title, text, host]", + default=None, + ) ap.add_argument( "-i", help="ignore duplicats, useful when multiple search engines are used", action="store_true", ) ap.add_argument( - "-proxy", help="use proxy (protocol://ip:port)", default=config.PROXY + "-proxy", + help="use proxy (protocol://ip:port)", + default=config.PROXY, ) args = ap.parse_args() @@ -47,7 +59,10 @@ def main(): ] if not engines: - print("Please choose a search engine: " + ", ".join(search_engines_dict)) + print( + "Please choose a search engine: " + + ", ".join(search_engines_dict) + ) else: if "all" in engines: engine = AllSearchEngines(proxy, timeout) diff --git a/setup.py b/setup.py index fed7c65..fd1f90b 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,9 @@ here = os.path.abspath(os.path.dirname(__file__)) -with codecs.open(os.path.join(here, "README.md"), encoding="utf-8") as fh: +with codecs.open( + os.path.join(here, "README.md"), encoding="utf-8" +) as fh: long_description = "\\n" + fh.read() requirements = [] diff --git "a/streamlit/01_\360\237\247\231\342\200\215\342\231\202\357\270\217_Palooza_Wizard.py" "b/streamlit/01_\360\237\247\231\342\200\215\342\231\202\357\270\217_Palooza_Wizard.py" index 3f6664a..31050e6 100644 --- "a/streamlit/01_\360\237\247\231\342\200\215\342\231\202\357\270\217_Palooza_Wizard.py" +++ "b/streamlit/01_\360\237\247\231\342\200\215\342\231\202\357\270\217_Palooza_Wizard.py" @@ -4,7 +4,9 @@ import streamlit.components.v1 as components import palooza_wizard as wizard -st.set_page_config(page_title="Palooza Wizard 🧙‍♂️", page_icon="🧙‍♂️", layout="wide") +st.set_page_config( + page_title="Palooza Wizard 🧙‍♂️", page_icon="🧙‍♂️", layout="wide" +) st.title("Palooza Wizard 🧙‍♂️") st.markdown( @@ -13,7 +15,9 @@ st.divider() interacted = 0 -url = st.text_input("Input an URL", placeholder="https://www.google.com/", value="") +url = st.text_input( + "Input an URL", placeholder="https://www.google.com/", value="" +) valid_url = 1 if validators.url(url) else 0 if not valid_url and url != "": st.error("The URL is not valid", icon="🚨") diff --git "a/streamlit/pages/02_\360\237\244\224_About.py" "b/streamlit/pages/02_\360\237\244\224_About.py" index 5c832a8..d682ed4 100644 --- "a/streamlit/pages/02_\360\237\244\224_About.py" +++ "b/streamlit/pages/02_\360\237\244\224_About.py" @@ -2,7 +2,9 @@ import trubrics import validators -st.set_page_config(page_title="Palooza Wizard 🧙‍♂️", page_icon="🧙‍♂️", layout="wide") +st.set_page_config( + page_title="Palooza Wizard 🧙‍♂️", page_icon="🧙‍♂️", layout="wide" +) st.title("About Palooza Wizard 🤔") st.header("🤓 Goal") @@ -18,4 +20,6 @@ st.markdown("lorem10") st.markdown("### ➡️ Next Page: [🚀 ](/)", unsafe_allow_html=False) -st.markdown("### ➡️ Visit our Website: [🚀 Datapalooza](https://datapalooza.co)") +st.markdown( + "### ➡️ Visit our Website: [🚀 Datapalooza](https://datapalooza.co)" +) diff --git "a/streamlit/pages/03_\360\237\232\200_Datapalooza.py" "b/streamlit/pages/03_\360\237\232\200_Datapalooza.py" index 98542a5..3d4cddf 100644 --- "a/streamlit/pages/03_\360\237\232\200_Datapalooza.py" +++ "b/streamlit/pages/03_\360\237\232\200_Datapalooza.py" @@ -60,12 +60,18 @@ col1, col2 = st.columns(2) with col1: - st.markdown("### [🔵 Linkedin](https://www.linkedin.com/company/datapalooza/)") + st.markdown( + "### [🔵 Linkedin](https://www.linkedin.com/company/datapalooza/)" + ) # st.image('images/octocat.png', width=150) - st.write("Get to know our data services and products. Contact us today!") + st.write( + "Get to know our data services and products. Contact us today!" + ) with col2: - st.markdown("### [:incoming_envelope: Email](mailto:info@datapalooza.co)") + st.markdown( + "### [:incoming_envelope: Email](mailto:info@datapalooza.co)" + ) # st.image('images/kaggle.png', width=125) st.write( "Do you have questions or a special inquery? Write us to **info@datapalooza.co**" @@ -74,4 +80,6 @@ st.markdown( "### ➡️ Visit Chain Breaker 🔗 Website: [here](https://chainbreaker.datapalooza.co/)" ) -st.markdown("### ➡️ Visit our Website: [🚀 Datapalooza](https://datapalooza.co)") +st.markdown( + "### ➡️ Visit our Website: [🚀 Datapalooza](https://datapalooza.co)" +)