Convert Metaphor documentation to Exa (#896)

run-llama · Jan 26, 2024 · f41dab0 · f41dab0
1 parent 57af091
commit f41dab0
Show file tree

Hide file tree

Showing 10 changed files with 824 additions and 599 deletions.
diff --git a/llama_hub/tools/exa/README.md b/llama_hub/tools/exa/README.md
@@ -0,0 +1,32 @@
+# Exa (formerly Metaphor) Tool 
+
+This tool connects to [Exa](https://exa.ai/) to easily enable
+your agent to search and get HTML content from the Internet.
+
+To begin, you need to obtain an API key on the [Exa developer dashboard](https://dashboard.exa.ai).
+
+## Usage
+
+This tool has more a extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/exa.ipynb)
+
+Here's an example usage of the ExaToolSpec.
+
+```python
+from llama_hub.tools.exa import ExaToolSpec
+from llama_index.agent import OpenAIAgent
+
+exa_tool = ExaToolSpec(
+    api_key='your-key',
+)
+agent = OpenAIAgent.from_tools(exa_tool.to_tool_list())
+
+agent.chat('Can you summarize the news published in the last month on superconductors')
+```
+
+`search`: Search for a list of articles relating to a natural language query
+`retrieve_documents`: Retrieve a list of documents returned from `exa_search`.
+`search_and_retrieve_documents`: Combines search and retrieve_documents to directly return a list of documents related to a search
+`find_similar`: Find similar documents to a given URL.
+`current_date`: Utility for the Agent to get todays date
+
+This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
diff --git a/llama_hub/tools/exa/__init__.py b/llama_hub/tools/exa/__init__.py
@@ -0,0 +1,6 @@
+## init
+from llama_hub.tools.exa.base import (
+    ExaToolSpec,
+)
+
+__all__ = ["ExaToolSpec"]
diff --git a/llama_hub/tools/exa/base.py b/llama_hub/tools/exa/base.py
@@ -0,0 +1,183 @@
+"""Exa (formerly Metaphor) tool spec."""
+
+import datetime
+from typing import List, Optional
+
+from llama_index.readers.schema.base import Document
+from llama_index.tools.tool_spec.base import BaseToolSpec
+
+
+class ExaToolSpec(BaseToolSpec):
+    """Exa tool spec."""
+
+    spec_functions = [
+        "search",
+        "retrieve_documents",
+        "search_and_retrieve_documents",
+        "search_and_retrieve_highlights",
+        "find_similar",
+        "current_date",
+    ]
+
+    def __init__(
+        self,
+        api_key: str,
+        verbose: bool = True,
+        max_characters: int = 2000,
+    ) -> None:
+        """Initialize with parameters."""
+        from exa_py import Exa
+
+        self.client = Exa(api_key=api_key, user_agent="llama-index")
+        self._verbose = verbose
+        # max characters for the text field in the search_and_contents function
+        self._max_characters = max_characters
+
+    def search(
+        self,
+        query: str,
+        num_results: Optional[int] = 10,
+        include_domains: Optional[List[str]] = None,
+        exclude_domains: Optional[List[str]] = None,
+        start_published_date: Optional[str] = None,
+        end_published_date: Optional[str] = None,
+    ) -> List:
+        """
+        Exa allows you to use a natural language query to search the internet
+
+        Args:
+            query (str): A natural language query phrased as an answer for what the link provides, ie: "This is the latest news about space:"
+            num_results (Optional[int]): Number of results to return. Defaults to 10.
+            include_domains (Optional[List(str)]): A list of top level domains like ["wsj.com"] to limit the search to specfic sites.
+            exclude_domains (Optional[List(str)]): Top level domains to exclude.
+            start_published_date (Optional[str]): A date string like "2020-06-15". Get the date from `current_date`
+            end_published_date (Optional[str]): End date string
+        """
+        response = self.client.search(
+            query,
+            num_results=num_results,
+            include_domains=include_domains,
+            exclude_domains=exclude_domains,
+            start_published_date=start_published_date,
+            end_published_date=end_published_date,
+            use_autoprompt=True,
+        )
+        if self._verbose:
+            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
+        return [
+            {"title": result.title, "url": result.url, "id": result.id}
+            for result in response.results
+        ]
+
+    def retrieve_documents(self, ids: List[str]) -> List[Document]:
+        """
+        Retrieve a list of document texts returned by `exa_search`, using the ID field
+
+        args:
+            ids (List(str)): the ids of the documents to retrieve
+        """
+
+        response = self.client.get_contents(ids)
+        return [Document(text=result.text) for result in response.results]
+
+    def find_similar(
+        self,
+        url: str,
+        num_results: Optional[int] = 3,
+        start_published_date: Optional[str] = None,
+        end_published_date: Optional[str] = None,
+    ) -> List:
+        """
+        Retrieve a list of similar documents to a given url
+
+        Args:
+            url (str): The web page to find similar results of
+            num_results (Optional[int]): Number of results to return. Default 3.
+            start_published_date (Optional[str]): A date string like "2020-06-15"
+            end_published_date (Optional[str]): End date string
+        """
+        response = self.client.find_similar(
+            url,
+            num_results=num_results,
+            start_published_date=start_published_date,
+            end_published_date=end_published_date,
+        )
+        return [
+            {"title": result.title, "url": result.url, "id": result.id}
+            for result in response.results
+        ]
+
+    def search_and_retrieve_documents(
+        self,
+        query: str,
+        num_results: Optional[int] = 10,
+        include_domains: Optional[List[str]] = None,
+        exclude_domains: Optional[List[str]] = None,
+        start_published_date: Optional[str] = None,
+        end_published_date: Optional[str] = None,
+    ) -> List[Document]:
+        """
+        Combines the functionality of `search` and `retrieve_documents`
+
+        Args:
+            query (str): the natural language query
+            num_results (Optional[int]): Number of results. Defaults to 10.
+            include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
+            exclude_domains (Optional[List(str)]): Top level domains to exclude.
+            start_published_date (Optional[str]): A date string like "2020-06-15".
+            end_published_date (Optional[str]): End date string
+        """
+        response = self.client.search_and_contents(
+            query,
+            num_results=num_results,
+            include_domains=include_domains,
+            exclude_domains=exclude_domains,
+            start_published_date=start_published_date,
+            end_published_date=end_published_date,
+            use_autoprompt=True,
+            text={"max_characters": self._max_characters},
+        )
+        if self._verbose:
+            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
+        return [Document(text=document.text) for document in response.results]
+
+    def search_and_retrieve_highlights(
+        self,
+        query: str,
+        num_results: Optional[int] = 10,
+        include_domains: Optional[List[str]] = None,
+        exclude_domains: Optional[List[str]] = None,
+        start_published_date: Optional[str] = None,
+        end_published_date: Optional[str] = None,
+    ) -> List[Document]:
+        """
+        Searches and retrieves highlights (intellient snippets from the document)
+
+        Args:
+            query (str): the natural language query
+            num_results (Optional[int]): Number of results. Defaults to 10.
+            include_domains (Optional[List(str)]): A list of top level domains to search, like ["wsj.com"]
+            exclude_domains (Optional[List(str)]): Top level domains to exclude.
+            start_published_date (Optional[str]): A date string like "2020-06-15".
+            end_published_date (Optional[str]): End date string
+        """
+        response = self.client.search_and_contents(
+            query,
+            num_results=num_results,
+            include_domains=include_domains,
+            exclude_domains=exclude_domains,
+            start_published_date=start_published_date,
+            end_published_date=end_published_date,
+            use_autoprompt=True,
+            highlights=True,
+        )
+        if self._verbose:
+            print(f"[Exa Tool] Autoprompt: {response.autoprompt_string}")
+        return [Document(text=document.highlights[0]) for document in response.results]
+
+    def current_date(self):
+        """
+        A function to return todays date.
+        Call this before any other functions that take timestamps as an argument
+        """
+        return datetime.date.today()
diff --git a/llama_hub/tools/exa/requirements.txt b/llama_hub/tools/exa/requirements.txt
@@ -0,0 +1 @@
+exa-py
diff --git a/llama_hub/tools/library.json b/llama_hub/tools/library.json
@@ -55,6 +55,10 @@
     "id": "tools/metaphor",
     "author": "ajhofmann"
   },
+  "ExaToolSpec": {
+    "id": "tools/exa",
+    "author": "jeffzwang"
+  },
   "MultionToolSpec": {
     "id": "tools/multion",
     "author": "ajhofmann"

diff --git a/llama_hub/tools/metaphor/README.md b/llama_hub/tools/metaphor/README.md
@@ -1,32 +1,3 @@
 # Metaphor Tool
 
-This tool connects to [Metaphor](https://metaphor.systems/) to easily enable
-your agent to search and get HTML content from the Internet.
-
-To begin, you need to obtain an API key on the [Metaphor developer dashboard](https://dashboard.metaphor.systems).
-
-## Usage
-
-This tool has more a extensive example usage documented in a Jupyter notebook [here](https://github.com/emptycrown/llama-hub/tree/main/llama_hub/tools/notebooks/metaphor.ipynb)
-
-Here's an example usage of the MetaphorToolSpec.
-
-```python
-from llama_hub.tools.metaphor import MetaphorToolSpec
-from llama_index.agent import OpenAIAgent
-
-metaphor_tool = MetaphorToolSpec(
-    api_key='your-key',
-)
-agent = OpenAIAgent.from_tools(metaphor_tool.to_tool_list())
-
-agent.chat('Can you summarize the news published in the last month on superconductors')
-```
-
-`search`: Search for a list of articles relating to a natural language query
-`retrieve_documents`: Retrieve a list of documents returned from `metaphor_search`.
-`search_and_retrieve_documents`: Combines search and retrieve_documents to directly return a list of documents related to a search
-`find_similar`: Find similar documents to a given URL.
-`current_date`: Utility for the Agent to get todays date
-
-This loader is designed to be used as a way to load data as a Tool in a Agent. See [here](https://github.com/emptycrown/llama-hub/tree/main) for examples.
+**NOTE**: Metaphor is now Exa. This tool has been removed. Please use the `ExaToolSpec` instead!