Skip to content

Commit

Permalink
Add super awersome feature
Browse files Browse the repository at this point in the history
  • Loading branch information
Juanchobanano committed Nov 28, 2023
1 parent 41b6c6f commit 5b0d959
Show file tree
Hide file tree
Showing 25 changed files with 300 additions and 206 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,4 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
playground/
outputs/
outputs/
11 changes: 11 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/psf/black
rev: 22.10.0
hooks:
- id: black
2 changes: 1 addition & 1 deletion LICENSE.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2 changes: 1 addition & 1 deletion palooza_wizard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
import palooza_wizard.constants as constants

for folder in constants.FOLDERS:
files.create_folder_if_not_exists(folder)
files.create_folder_if_not_exists(folder)
38 changes: 24 additions & 14 deletions palooza_wizard/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import palooza_wizard.chatgpt as pwc
import palooza_wizard.constants as ct
from typing import List
import os
import os


def get_agent_code(file_name: str):
python_code = load_code_string(file_name = file_name)
python_code = load_code_string(file_name=file_name)
system_message = pwc.get_system_message_for_agent()
messages = [
{"role": "system", "content": system_message},
Expand All @@ -16,25 +17,33 @@ def get_agent_code(file_name: str):
with open(f"./{file_name}", "w") as f:
f.write(agent_code)


def get_element_metadata(task: dict) -> tuple:
data = task["element"]
tag, attribute, value = data["tag"], data["attribute"], data["value"]
return tag, attribute, value


def get_agent_function(file_path: str) -> str:
with open(f'{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}', "r", encoding='windows-1252') as f:
with open(
f"{ct.IMPORTANCE_OUTPUT_FOLDER}/{file_path}", "r", encoding="windows-1252"
) as f:
user_message = f.read()
function_name = file_path
messages = pwc.get_messages_for_function(user_message = user_message, function_name = function_name)
completion = pwc.get_completion_from_messages(messages = messages)
messages = pwc.get_messages_for_function(
user_message=user_message, function_name=function_name
)
completion = pwc.get_completion_from_messages(messages=messages)
return completion


def save_completion(completion: str, file_name: str) -> None:
file_name = file_name.replace(".html", ".py")
with open(f"{ct.AGENT_OUTPUT_FOLDER}/{file_name}", "a") as f:
f.write(completion)
f.write("\n\n")


def get_agent_functions() -> None:
"""Get agent functions
Expand All @@ -45,23 +54,24 @@ def get_agent_functions() -> None:
Return:
- None
"""
#try:
# try:
# #os.remove(f"{ct.FUNCTIONS_OUTPUT_FOLDER}/{file_name}")
# os.remove(file_name)
#except:
# except:
# pass

file_paths = os.listdir(ct.IMPORTANCE_OUTPUT_FOLDER)
#file_paths = [file_paths[index] for index in indexes]
# file_paths = [file_paths[index] for index in indexes]

for file_path in file_paths:
completion = get_agent_function(file_path)
completion = pwc.format_python_completion(completion = completion)
save_completion(completion = completion, file_name = file_path)
completion = pwc.format_python_completion(completion=completion)
save_completion(completion=completion, file_name=file_path)


def load_code_string(file_name: str):
#{ct.FUNCTIONS_OUTPUT_FOLDER}
# {ct.FUNCTIONS_OUTPUT_FOLDER}
with open(f"./{file_name}", "r") as f:
python_code = f.read()
print(python_code)
python_code = f.read()
print(python_code)
return python_code
2 changes: 1 addition & 1 deletion palooza_wizard/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .degree_importance import degree_importance
from .degree_importance import degree_importance
72 changes: 34 additions & 38 deletions palooza_wizard/algorithms/degree_importance.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,71 +2,67 @@
from typing import List
import palooza_wizard.constants as ct
from bs4 import BeautifulSoup
import os
import joblib
import sys

def filter_candidates_by_containment(
graph: nx.DiGraph,
candidates: List[int]
):
"""Para todo g1, g2 e I, g1 no contiene a g2 ni g2 a g1
"""
import os
import joblib
import sys


def filter_candidates_by_containment(graph: nx.DiGraph, candidates: List[int]):
"""Para todo g1, g2 e I, g1 no contiene a g2 ni g2 a g1"""
inadmissable_nodes = []
for candidate in candidates:
if candidate in inadmissable_nodes:
continue
continue
descendants = list(nx.descendants(graph, candidate))
inadmissable_nodes = inadmissable_nodes + descendants
yield candidate


def filter_candidates_by_depth(
graph: nx.DiGraph,
root,
candidates: List[int],
min_depth: int
):
nodes_depth = nx.shortest_path_length(graph, root)
for candidate in candidates:
graph: nx.DiGraph, root, candidates: List[int], min_depth: int
):
nodes_depth = nx.shortest_path_length(graph, root)
for candidate in candidates:
if nodes_depth[candidate] > min_depth:
yield candidate
yield candidate


def filter_candidates(
graph: nx.DiGraph,
root: str,
candidates: List[int],
min_depth: int = 3
):
candidates = [x for x in filter_candidates_by_depth(graph, root, candidates, min_depth)]
graph: nx.DiGraph, root: str, candidates: List[int], min_depth: int = 3
):
candidates = [
x for x in filter_candidates_by_depth(graph, root, candidates, min_depth)
]
candidates = [x for x in filter_candidates_by_containment(graph, candidates)]
return candidates
return candidates


def degree_importance(
graph: nx.DiGraph,
root: str,
min_depth: int,
max_candidates: int,
verbose: bool = False
) -> List[int]:

# Get degree of nodes.
graph: nx.DiGraph,
root: str,
min_depth: int,
max_candidates: int,
verbose: bool = False,
) -> List[int]:

# Get degree of nodes.
nodes_degree = list(graph.degree(graph.nodes()))

# Sort nodes by degree.
nodes_degree.sort(key = lambda z: z[1], reverse=True)
nodes_degree.sort(key=lambda z: z[1], reverse=True)

nodes_degree = nodes_degree[:max_candidates]

#for key, value in nodes_degree:
# for key, value in nodes_degree:
# print(f"{key}: {value}")

# Get candidates of nodes.
candidates = [x[0] for x in nodes_degree]

# Filter candidates.
candidates = filter_candidates(graph, root, candidates, min_depth = min_depth)
candidates = filter_candidates(graph, root, candidates, min_depth=min_depth)

if verbose:
if verbose:
print("Candidates")
print(candidates)

Expand Down
2 changes: 1 addition & 1 deletion palooza_wizard/chatgpt/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .pricing import *
from .tokens import *
from .chatgpt import *
from .chatgpt import *
31 changes: 20 additions & 11 deletions palooza_wizard/chatgpt/chatgpt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import openai
from typing import List
from typing import List
from dotenv import dotenv_values
import palooza_wizard.constants as ct
import palooza_wizard.chatgpt as chatgpt
Expand All @@ -8,34 +8,40 @@

openai.api_key = ct.OPEN_AI_API_KEY

def get_completion_from_messages(messages, model="gpt-4", temperature=0, max_tokens=500): # gpt-4

def get_completion_from_messages(
messages, model="gpt-4", temperature=0, max_tokens=500
): # gpt-4
response = openai.ChatCompletion.create(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
temperature=temperature,
max_tokens=max_tokens,
)
return response.choices[0].message["content"]


def get_system_message_for_function(function_name: str) -> str:
system_message = f"""
Create a Python function with a proper name starting with '{function_name}' that extracts all relevant data from
the provided HTML code by the user and returns the data as a dictionary. The keys of the dictionaries should be as
few as possible. Utilize Beautiful Soup (beautifulsoup4) to implement this Python function. The output should
Create a Python function with a proper name starting with '{function_name}' that extracts all relevant data from
the provided HTML code by the user and returns the data as a dictionary. The keys of the dictionaries should be as
few as possible. Utilize Beautiful Soup (beautifulsoup4) to implement this Python function. The output should
consist solely of Python code without any English words.
"""
return system_message


def get_system_message_for_agent() -> str:
system_message = f"""
Create a python class called 'agent' which implements as methods the defined functions passed by the user in the prompt.
Create a python class called 'agent' which implements as methods the defined functions passed by the user in the prompt.
In addition, include a new method called 'extract_data' that accepts as argument a beautifulsoup object and a url and which uses all the methods of the class and returns a dictionary with the result
of each method. Constructor method must not recieve any parameter, instead, each method within the class must accept as parameter a BeautifulSoup object
"""
return system_message


def get_messages_for_function(user_message: str, function_name: str) -> List[str]:
system_message = get_system_message_for_function(function_name)
system_message = get_system_message_for_function(function_name)
num_tokens = chatgpt.num_tokens_for_model(system_message + user_message)
print("Number of tokens to be sent: ", num_tokens)
messages = [
Expand All @@ -44,8 +50,11 @@ def get_messages_for_function(user_message: str, function_name: str) -> List[str
]
return messages


def format_python_completion(completion: str) -> str:
if completion.find("```python") != -1:
a, b = completion.find("```python"), completion.find("```", len("```python") + 1, len(completion))
return completion[a + len("```python"): b]
a, b = completion.find("```python"), completion.find(
"```", len("```python") + 1, len(completion)
)
return completion[a + len("```python") : b]
return completion
12 changes: 6 additions & 6 deletions palooza_wizard/chatgpt/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@
"training": 0.0004,
"input_usage": 0.0016,
"output_usage": 0.0016,
"num_tokens": 1000
},
"num_tokens": 1000,
},
"davinci-002": {
"training": 0.0060,
"input_usage": 0.0120,
"output_usage": 0.0120,
"num_tokens": 1000
"num_tokens": 1000,
},
"GPT-3.5-Turbo": {
"training": 0.0080,
"input_usage": 0.0120,
"output_usage": 0.0160,
"num_tokens": 1000
}
}
"num_tokens": 1000,
},
}
10 changes: 7 additions & 3 deletions palooza_wizard/chatgpt/pricing.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,31 @@
import palooza_wizard.chatgpt.constants as ct
from typing import List
from typing import List


def get_available_models() -> List[str]:
return list(ct.PRICING.keys())


def validate_model(model: str) -> bool:
return model in ct.PRICING.keys()


def estimated_training_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
assert validate_model(model), "Invalid model"
num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
training_cost = ct.PRICING[model]["training"]
return (num_tokens / num_tokens_per_cost) * training_cost


def estimated_input_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
assert validate_model(model), "Invalid model"
num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
input_cost = ct.PRICING[model]["input_usage"]
return (num_tokens / num_tokens_per_cost) * input_cost

def estimated_output_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:

def estimated_output_usage_cost(num_tokens: int, model: str = "GPT-3.5-Turbo") -> float:
assert validate_model(model), "Invalid model"
num_tokens_per_cost = ct.PRICING[model]["num_tokens"]
output_cost = ct.PRICING[model]["output_usage"]
return (num_tokens / num_tokens_per_cost) * output_cost

4 changes: 3 additions & 1 deletion palooza_wizard/chatgpt/tokens.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import tiktoken


def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") -> int:
"""This function computes the number of tokens in a string
#https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
Expand All @@ -8,10 +9,11 @@ def num_tokens_with_encoding(string: str, encoding_name: str = "cl100k_base") ->
num_tokens = len(encoding.encode(string))
return num_tokens


def num_tokens_for_model(string: str, model_name: str = "gpt-4") -> int:
"""This function computers the number of token in a string for a specific model name
#https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
"""
encoding = tiktoken.encoding_for_model(model_name)
num_tokens = len(encoding.encode(string))
return num_tokens
return num_tokens
Loading

0 comments on commit 5b0d959

Please sign in to comment.