Skip to content

Commit

Permalink
remove pydf dependency, update requirements.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
krohling committed Dec 15, 2023
1 parent 18bca90 commit 81858fd
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 29 deletions.
15 changes: 2 additions & 13 deletions bondai/tools/file/file_query.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import pypdf
from pydantic import BaseModel
from typing import Dict
from bondai.tools import Tool
from bondai.models import LLM, EmbeddingModel
from bondai.util import semantic_search, is_html, get_html_text
from bondai.util import semantic_search, is_html, get_html_text, extract_file_text
from bondai.models.openai import OpenAILLM, OpenAIEmbeddingModel, OpenAIModelNames

TOOL_NAME = "file_query"
Expand All @@ -17,16 +16,6 @@ def is_pdf(filename: str) -> bool:
return header == b"%PDF"


def extract_text_from_pdf(file_path: str) -> str:
with open(file_path, "rb") as file:
pdf = pypdf.PdfReader(file)
text = ""
for page_number in range(len(pdf.pages)):
page = pdf.pages[page_number]
text += page.extract_text()
return text


def build_prompt(question: str, context: str) -> str:
return f"""{context}
Expand Down Expand Up @@ -67,7 +56,7 @@ def run(self, arguments: Dict) -> str:
raise Exception("question is required")

if is_pdf(filename):
text = extract_text_from_pdf(filename)
text = extract_file_text(filename)
else:
with open(filename, "r") as f:
text = f.read()
Expand Down
14 changes: 2 additions & 12 deletions bondai/tools/file/file_read.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pypdf
from pydantic import BaseModel
from typing import Dict
from bondai.tools import Tool
from bondai.util import extract_file_text

TOOL_NAME = "file_read"
TOOL_DESCRIPTION = "This tool will return the contents of a file for you to view. Just specify the filename of the file using the 'filename' parameter."
Expand All @@ -13,16 +13,6 @@ def is_pdf(filename: str) -> bool:
return header == b"%PDF"


def extract_text_from_pdf(file_path: str) -> str:
with open(file_path, "rb") as file:
pdf = pypdf.PdfReader(file)
text = ""
for page_number in range(len(pdf.pages)):
page = pdf.pages[page_number]
text += page.extract_text()
return text


class Parameters(BaseModel):
filename: str
thought: str
Expand All @@ -38,7 +28,7 @@ def run(self, arguments: Dict) -> str:
raise Exception("filename is required")

if is_pdf(filename):
return extract_text_from_pdf(filename)
return extract_file_text(filename)
else:
with open(filename, "r") as f:
return f.read()
1 change: 0 additions & 1 deletion bondai/tools/search/duck_duck_go_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
MAX_RESULT_COUNT = 20
DEFAULT_RESULT_COUNT = 5
TOOL_NAME = "duck_duck_go_search"
TOOL_DESCRIPTION = "This tool allows you to search DuckDuckGo. Specify your search string in the 'query' parameter and it will return a list that includes the title and url of matched websites."
TOOL_DESCRIPTION = f"This tool allows you to retrieve a paginated list of search results. You must specify your search string in the 'query' parameter. You can specify the number of search results to return by setting the 'count' parameter. The maximum count is {MAX_RESULT_COUNT} and the default is {DEFAULT_RESULT_COUNT}. To paginate through the full list of all search results just increment the 'page' parameter. By default 'page' is set to 1."


Expand Down
3 changes: 0 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ h2==3.2.0
hpack==3.0.0
httpcore==1.0.2
httplib2==0.22.0
httpx==0.25.1
hyperframe==5.2.0
idna==3.4
importlib-metadata==6.8.0
Expand All @@ -59,7 +58,6 @@ protobuf==4.24.1
psycopg2-binary==2.9.7
pyasn1==0.5.0
pyasn1-modules==0.3.0
pydantic==1.10.12
pydantic_core==2.6.1
pyee==8.2.2
pyparsing==3.1.1
Expand Down Expand Up @@ -92,7 +90,6 @@ urllib3==1.26.17
w3lib==2.1.2
websocket-client==1.6.3
websockets==10.4
Werkzeug==2.3.7
wsproto==1.2.0
yarl==1.9.2
zipp==3.16.2

0 comments on commit 81858fd

Please sign in to comment.