Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Juanchobanano committed Nov 28, 2023
1 parent 5b0d959 commit 4a60938
Show file tree
Hide file tree
Showing 13 changed files with 122 additions and 37 deletions.
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ repos:
rev: 22.10.0
hooks:
- id: black
language_version: python3.11
args: [--line-length=70]
14 changes: 11 additions & 3 deletions palooza_wizard/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@ def get_agent_code(file_name: str):

def get_element_metadata(task: dict) -> tuple:
data = task["element"]
tag, attribute, value = data["tag"], data["attribute"], data["value"]
tag, attribute, value = (
data["tag"],
data["attribute"],
data["value"],
)
return tag, attribute, value


def get_agent_function(file_path: str) -> str:
with open(
f"{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}", "r", encoding="windows-1252"
f"{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}",
"r",
encoding="windows-1252",
) as f:
user_message = f.read()
function_name = file_path
Expand Down Expand Up @@ -65,7 +71,9 @@ def get_agent_functions() -> None:

for file_path in file_paths:
completion = get_agent_function(file_path)
completion = pwc.format_python_completion(completion=completion)
completion = pwc.format_python_completion(
completion=completion
)
save_completion(completion=completion, file_name=file_path)


Expand Down
22 changes: 17 additions & 5 deletions palooza_wizard/algorithms/degree_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
import sys


def filter_candidates_by_containment(graph: nx.DiGraph, candidates: List[int]):
def filter_candidates_by_containment(
graph: nx.DiGraph, candidates: List[int]
):
"""Para todo g1, g2 e I, g1 no contiene a g2 ni g2 a g1"""
inadmissable_nodes = []
for candidate in candidates:
Expand All @@ -28,12 +30,20 @@ def filter_candidates_by_depth(


def filter_candidates(
graph: nx.DiGraph, root: str, candidates: List[int], min_depth: int = 3
graph: nx.DiGraph,
root: str,
candidates: List[int],
min_depth: int = 3,
):
candidates = [
x for x in filter_candidates_by_depth(graph, root, candidates, min_depth)
x
for x in filter_candidates_by_depth(
graph, root, candidates, min_depth
)
]
candidates = [
x for x in filter_candidates_by_containment(graph, candidates)
]
candidates = [x for x in filter_candidates_by_containment(graph, candidates)]
return candidates


Expand All @@ -60,7 +70,9 @@ def degree_importance(
candidates = [x[0] for x in nodes_degree]

# Filter candidates.
candidates = filter_candidates(graph, root, candidates, min_depth=min_depth)
candidates = filter_candidates(
graph, root, candidates, min_depth=min_depth
)

if verbose:
print("Candidates")
Expand Down
8 changes: 6 additions & 2 deletions palooza_wizard/chatgpt/chatgpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,13 @@ def get_system_message_for_agent() -> str:
return system_message


def get_messages_for_function(user_message: str, function_name: str) -> List[str]:
def get_messages_for_function(
user_message: str, function_name: str
) -> List[str]:
system_message = get_system_message_for_function(function_name)
num_tokens = chatgpt.num_tokens_for_model(system_message + user_message)
num_tokens = chatgpt.num_tokens_for_model(
system_message + user_message
)
print("Number of tokens to be sent: ", num_tokens)
messages = [
{"role": "system", "content": system_message},
Expand Down
12 changes: 9 additions & 3 deletions palooza_wizard/chatgpt/pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,27 @@ def validate_model(model: str) -> bool:
return model in ct.PRICING.keys()


def estimated_training_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
def estimated_training_cost(
num_tokens: int, model: str = "GPT-3.5-Turbo"
) -> float:
assert validate_model(model), "Invalid model"
num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
training_cost = ct.PRICING[model]["training"]
return (num_tokens / num_tokens_per_cost) * training_cost


def estimated_input_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
def estimated_input_usage_cost(
num_tokens: int, model: str = "GPT-3.5-Turbo"
) -> float:
assert validate_model(model), "Invalid model"
num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
input_cost = ct.PRICING[model]["input_usage"]
return (num_tokens / num_tokens_per_cost) * input_cost


def estimated_output_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
def estimated_output_usage_cost(
num_tokens: int, model: str = "GPT-3.5-Turbo"
) -> float:
assert validate_model(model), "Invalid model"
num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
output_cost = ct.PRICING[model]["output_usage"]
Expand Down
8 changes: 6 additions & 2 deletions palooza_wizard/chatgpt/tokens.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import tiktoken


def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") -> int:
def num_tokens_with_encoding(
string: str, encoding_name: str = "cl100k_base"
) -> int:
"""This function computes the number of tokens in a string
#https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
"""
Expand All @@ -10,7 +12,9 @@ def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") ->
return num_tokens


def num_tokens_for_model(string: str, model_name: str = "gpt-4") -> int:
def num_tokens_for_model(
string: str, model_name: str = "gpt-4"
) -> int:
"""This function computers the number of token in a string for a specific model name
#https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
"""
Expand Down
22 changes: 17 additions & 5 deletions palooza_wizard/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ def get_color(self, soup: BeautifulSoup) -> None:
self.sizes.append(1)

# Get node name
def get_node_name(self, soup: BeautifulSoup, parent_name: str, index: int) -> None:
def get_node_name(
self, soup: BeautifulSoup, parent_name: str, index: int
) -> None:
self.get_color(soup)
node_name = f"{parent_name}__{index}__{str(soup.name)}__*"
return node_name
Expand All @@ -63,7 +65,11 @@ def get_node_properties(

# Add nodes to the graph
def add_nodes(
self, soup: BeautifulSoup, parent_name: str = "", index: int = 1, depth: int = 0
self,
soup: BeautifulSoup,
parent_name: str = "",
index: int = 1,
depth: int = 0,
):

if soup is None:
Expand All @@ -75,7 +81,9 @@ def add_nodes(
node_name = self.get_node_name(soup, parent_name, index)

# Add node to the graph.
properties = self.get_node_properties(soup, parent_name, node_name)
properties = self.get_node_properties(
soup, parent_name, node_name
)
self.G.add_node(node_name, **properties)
self.counter += 1

Expand All @@ -94,9 +102,13 @@ def add_nodes(
# Add children only if there is more than 1 children
# if len(children) > 1:
for i in range(len(children)):
self.add_nodes(children[i], node_name, i + 1, depth + 1)
self.add_nodes(
children[i], node_name, i + 1, depth + 1
)

def get_graph(self, soup: BeautifulSoup, labels_to_integers: bool = True):
def get_graph(
self, soup: BeautifulSoup, labels_to_integers: bool = True
):
self.add_nodes(soup, ct.ROOT_LABEL)
if labels_to_integers:
self.G = nx.convert_node_labels_to_integers(self.G)
Expand Down
8 changes: 6 additions & 2 deletions palooza_wizard/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from typing import List


def get_files_in_folder(folder_path: str, full_path: bool = True) -> List[str]:
def get_files_in_folder(
folder_path: str, full_path: bool = True
) -> List[str]:
if not file_exists(folder_path):
raise Exception("Folder not found")
files = os.listdir(folder_path)
Expand Down Expand Up @@ -68,7 +70,9 @@ def get_html_from_url(url: str, use_proxies: bool = True) -> str:
return data.content


def get_soup_from_url(url: str, use_proxies: bool = True) -> BeautifulSoup:
def get_soup_from_url(
url: str, use_proxies: bool = True
) -> BeautifulSoup:
"""Download HTML file and return a BeautifulSoup object using get_html_from_url function"""
soup = get_html_from_url(url, use_proxies=use_proxies)
soup = BeautifulSoup(soup, "html.parser")
Expand Down
27 changes: 21 additions & 6 deletions palooza_wizard_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
try:
from palooza_wizard import DatapaloozaWizard
except ImportError as e:
msg = '"{}"\nPlease install `palooza_wizard` to resolve this error.'
msg = (
'"{}"\nPlease install `palooza_wizard` to resolve this error.'
)
raise ImportError(msg.format(str(e)))


Expand All @@ -16,24 +18,34 @@ def main():
help="search engine(s) - " + ", ".join(search_engines_dict),
default="google",
)
ap.add_argument("-o", help="output file [html, csv, json]", default="print")
ap.add_argument(
"-n", help="filename for output file", default=config.OUTPUT_DIR + "output"
"-o", help="output file [html, csv, json]", default="print"
)
ap.add_argument(
"-n",
help="filename for output file",
default=config.OUTPUT_DIR + "output",
)
ap.add_argument(
"-p",
help="number of pages",
default=config.SEARCH_ENGINE_RESULTS_PAGES,
type=int,
)
ap.add_argument("-f", help="filter results [url, title, text, host]", default=None)
ap.add_argument(
"-f",
help="filter results [url, title, text, host]",
default=None,
)
ap.add_argument(
"-i",
help="ignore duplicats, useful when multiple search engines are used",
action="store_true",
)
ap.add_argument(
"-proxy", help="use proxy (protocol://ip:port)", default=config.PROXY
"-proxy",
help="use proxy (protocol://ip:port)",
default=config.PROXY,
)

args = ap.parse_args()
Expand All @@ -47,7 +59,10 @@ def main():
]

if not engines:
print("Please choose a search engine: " + ", ".join(search_engines_dict))
print(
"Please choose a search engine: "
+ ", ".join(search_engines_dict)
)
else:
if "all" in engines:
engine = AllSearchEngines(proxy, timeout)
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

here = os.path.abspath(os.path.dirname(__file__))

with codecs.open(os.path.join(here, "README.md"), encoding="utf-8") as fh:
with codecs.open(
os.path.join(here, "README.md"), encoding="utf-8"
) as fh:
long_description = "\\n" + fh.read()

requirements = []
Expand Down
8 changes: 6 additions & 2 deletions streamlit/01_πŸ§™β€β™‚οΈ_Palooza_Wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
import streamlit.components.v1 as components
import palooza_wizard as wizard

st.set_page_config(page_title="Palooza Wizard πŸ§™β€β™‚οΈ", page_icon="πŸ§™β€β™‚οΈ", layout="wide")
st.set_page_config(
page_title="Palooza Wizard πŸ§™β€β™‚οΈ", page_icon="πŸ§™β€β™‚οΈ", layout="wide"
)
st.title("Palooza Wizard πŸ§™β€β™‚οΈ")

st.markdown(
Expand All @@ -13,7 +15,9 @@
st.divider()
interacted = 0

url = st.text_input("Input an URL", placeholder="https://www.google.com/", value="")
url = st.text_input(
"Input an URL", placeholder="https://www.google.com/", value=""
)
valid_url = 1 if validators.url(url) else 0
if not valid_url and url != "":
st.error("The URL is not valid", icon="🚨")
Expand Down
8 changes: 6 additions & 2 deletions streamlit/pages/02_πŸ€”_About.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import trubrics
import validators

st.set_page_config(page_title="Palooza Wizard πŸ§™β€β™‚οΈ", page_icon="πŸ§™β€β™‚οΈ", layout="wide")
st.set_page_config(
page_title="Palooza Wizard πŸ§™β€β™‚οΈ", page_icon="πŸ§™β€β™‚οΈ", layout="wide"
)
st.title("About Palooza Wizard πŸ€”")

st.header("πŸ€“ Goal")
Expand All @@ -18,4 +20,6 @@
st.markdown("lorem10")

st.markdown("### ➑️ Next Page: [πŸš€ ](/)", unsafe_allow_html=False)
st.markdown("### ➑️ Visit our Website: [πŸš€ Datapalooza](https://datapalooza.co)")
st.markdown(
"### ➑️ Visit our Website: [πŸš€ Datapalooza](https://datapalooza.co)"
)
16 changes: 12 additions & 4 deletions streamlit/pages/03_πŸš€_Datapalooza.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,18 @@

col1, col2 = st.columns(2)
with col1:
st.markdown("### [πŸ”΅ Linkedin](https://www.linkedin.com/company/datapalooza/)")
st.markdown(
"### [πŸ”΅ Linkedin](https://www.linkedin.com/company/datapalooza/)"
)
# st.image('images/octocat.png', width=150)
st.write("Get to know our data services and products. Contact us today!")
st.write(
"Get to know our data services and products. Contact us today!"
)

with col2:
st.markdown("### [:incoming_envelope: Email](mailto:[email protected])")
st.markdown(
"### [:incoming_envelope: Email](mailto:[email protected])"
)
# st.image('images/kaggle.png', width=125)
st.write(
"Do you have questions or a special inquery? Write us to **[email protected]**"
Expand All @@ -74,4 +80,6 @@
st.markdown(
"### ➑️ Visit Chain Breaker πŸ”— Website: [here](https://chainbreaker.datapalooza.co/)"
)
st.markdown("### ➑️ Visit our Website: [πŸš€ Datapalooza](https://datapalooza.co)")
st.markdown(
"### ➑️ Visit our Website: [πŸš€ Datapalooza](https://datapalooza.co)"
)

0 comments on commit 4a60938

Please sign in to comment.