Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Convert Metaphor documentation to Exa #896

Merged
merged 7 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions llama_hub/tools/exa/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Exa (formerly Metaphor) Tool

This tool connects to [Exa](https://exa.ai/) to easily enable
your agent to search and get HTML content from the Internet.

To begin, you need to obtain an API key on the [Exa developer dashboard](https://dashboard.exa.ai).

## Usage

This tool has more a extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/exa.ipynb)

Here's an example usage of the ExaToolSpec.

```python
from llama_hub.tools.exa import ExaToolSpec
from llama_index.agent import OpenAIAgent

exa_tool = ExaToolSpec(
api_key='your-key',
)
agent = OpenAIAgent.from_tools(exa_tool.to_tool_list())

agent.chat('Can you summarize the news published in the last month on superconductors')
```

`search`: Search for a list of articles relating to a natural language query
`retrieve_documents`: Retrieve a list of documents returned from `exa_search`.
`search_and_retrieve_documents`: Combines search and retrieve_documents to directly return a list of documents related to a search
`find_similar`: Find similar documents to a given URL.
`current_date`: Utility for the Agent to get todays date

This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
6 changes: 6 additions & 0 deletions llama_hub/tools/exa/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## init
from llama_hub.tools.exa.base import (
ExaToolSpec,
)

__all__ = ["ExaToolSpec"]
183 changes: 183 additions & 0 deletions llama_hub/tools/exa/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""Exa (formerly Metaphor) tool spec."""

import datetime
from typing import List, Optional

from llama_index.readers.schema.base import Document
from llama_index.tools.tool_spec.base import BaseToolSpec


class ExaToolSpec(BaseToolSpec):
"""Exa tool spec."""

spec_functions = [
"search",
"retrieve_documents",
"search_and_retrieve_documents",
"search_and_retrieve_highlights",
"find_similar",
"current_date",
]

def __init__(
self,
api_key: str,
verbose: bool = True,
max_characters: int = 2000,
) -> None:
"""Initialize with parameters."""
from exa_py import Exa

self.client = Exa(api_key=api_key, user_agent="llama-index")
self._verbose = verbose
# max characters for the text field in the search_and_contents function
self._max_characters = max_characters

def search(
self,
query: str,
num_results: Optional[int] = 10,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
) -> List:
"""
Exa allows you to use a natural language query to search the internet

Args:
query (str): A natural language query phrased as an answer for what the link provides, ie: "This is the latest news about space:"
num_results (Optional[int]): Number of results to return. Defaults to 10.
include_domains (Optional[List(str)]): A list of top level domains like ["wsj.com"] to limit the search to specfic sites.
exclude_domains (Optional[List(str)]): Top level domains to exclude.
start_published_date (Optional[str]): A date string like "2020-06-15". Get the date from `current_date`
end_published_date (Optional[str]): End date string
"""
response = self.client.search(
query,
num_results=num_results,
include_domains=include_domains,
exclude_domains=exclude_domains,
start_published_date=start_published_date,
end_published_date=end_published_date,
use_autoprompt=True,
)
if self._verbose:
print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
return [
{"title": result.title, "url": result.url, "id": result.id}
for result in response.results
]

def retrieve_documents(self, ids: List[str]) -> List[Document]:
"""
Retrieve a list of document texts returned by `exa_search`, using the ID field

args:
ids (List(str)): the ids of the documents to retrieve
"""

response = self.client.get_contents(ids)
return [Document(text=result.text) for result in response.results]

def find_similar(
self,
url: str,
num_results: Optional[int] = 3,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
) -> List:
"""
Retrieve a list of similar documents to a given url

Args:
url (str): The web page to find similar results of
num_results (Optional[int]): Number of results to return. Default 3.
start_published_date (Optional[str]): A date string like "2020-06-15"
end_published_date (Optional[str]): End date string
"""
response = self.client.find_similar(
url,
num_results=num_results,
start_published_date=start_published_date,
end_published_date=end_published_date,
)
return [
{"title": result.title, "url": result.url, "id": result.id}
for result in response.results
]

def search_and_retrieve_documents(
self,
query: str,
num_results: Optional[int] = 10,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
) -> List[Document]:
"""
Combines the functionality of `search` and `retrieve_documents`

Args:
query (str): the natural language query
num_results (Optional[int]): Number of results. Defaults to 10.
include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
exclude_domains (Optional[List(str)]): Top level domains to exclude.
start_published_date (Optional[str]): A date string like "2020-06-15".
end_published_date (Optional[str]): End date string
"""
response = self.client.search_and_contents(
query,
num_results=num_results,
include_domains=include_domains,
exclude_domains=exclude_domains,
start_published_date=start_published_date,
end_published_date=end_published_date,
use_autoprompt=True,
text={"max_characters": self._max_characters},
)
if self._verbose:
print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
return [Document(text=document.text) for document in response.results]

def search_and_retrieve_highlights(
self,
query: str,
num_results: Optional[int] = 10,
include_domains: Optional[List[str]] = None,
exclude_domains: Optional[List[str]] = None,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
) -> List[Document]:
"""
Searches and retrieves highlights (intellient snippets from the document)

Args:
query (str): the natural language query
num_results (Optional[int]): Number of results. Defaults to 10.
include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
exclude_domains (Optional[List(str)]): Top level domains to exclude.
start_published_date (Optional[str]): A date string like "2020-06-15".
end_published_date (Optional[str]): End date string
"""
response = self.client.search_and_contents(
query,
num_results=num_results,
include_domains=include_domains,
exclude_domains=exclude_domains,
start_published_date=start_published_date,
end_published_date=end_published_date,
use_autoprompt=True,
highlights=True,
)
if self._verbose:
print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
return [Document(text=document.highlights[0]) for document in response.results]

def current_date(self):
"""
A function to return todays date.
Call this before any other functions that take timestamps as an argument
"""
return datetime.date.today()
1 change: 1 addition & 0 deletions llama_hub/tools/exa/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
exa-py
4 changes: 4 additions & 0 deletions llama_hub/tools/library.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@
"id": "tools/metaphor",
"author": "ajhofmann"
},
"ExaToolSpec": {
"id": "tools/exa",
"author": "jeffzwang"
},
"MultionToolSpec": {
"id": "tools/multion",
"author": "ajhofmann"
Expand Down
31 changes: 1 addition & 30 deletions llama_hub/tools/metaphor/README.md
Original file line number Diff line number Diff line change
@@ -1,32 +1,3 @@
# Metaphor Tool

This tool connects to [Metaphor](https://metaphor.systems/) to easily enable
your agent to search and get HTML content from the Internet.

To begin, you need to obtain an API key on the [Metaphor developer dashboard](https://dashboard.metaphor.systems).

## Usage

This tool has more a extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/metaphor.ipynb)

Here's an example usage of the MetaphorToolSpec.

```python
from llama_hub.tools.metaphor import MetaphorToolSpec
from llama_index.agent import OpenAIAgent

metaphor_tool = MetaphorToolSpec(
api_key='your-key',
)
agent = OpenAIAgent.from_tools(metaphor_tool.to_tool_list())

agent.chat('Can you summarize the news published in the last month on superconductors')
```

`search`: Search for a list of articles relating to a natural language query
`retrieve_documents`: Retrieve a list of documents returned from `metaphor_search`.
`search_and_retrieve_documents`: Combines search and retrieve_documents to directly return a list of documents related to a search
`find_similar`: Find similar documents to a given URL.
`current_date`: Utility for the Agent to get todays date

This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
**NOTE**: Metaphor is now Exa. This tool has been removed. Please use the `ExaToolSpec` instead!
Loading
Loading