From 4878f6f7966e3802822ed6e9f4e78a8787b3c73c Mon Sep 17 00:00:00 2001 From: Rajendra Kadam Date: Sat, 31 Aug 2024 16:48:46 +0530 Subject: [PATCH] Add search api engine integration class Add documentation and example action invoke Add steps to perform searches on various engines using SearchApi --- dev/Readme.md | 1 + docs/components/action/tools.md | 42 +++++++++++++++--- docs/use-cases/jobsearch-agent.md | 2 +- src/openagi/actions/tools/searchapi_search.py | 44 +++++++++++++++++++ src/openagi/utils/tool_list.py | 2 + 5 files changed, 83 insertions(+), 8 deletions(-) create mode 100644 src/openagi/actions/tools/searchapi_search.py diff --git a/dev/Readme.md b/dev/Readme.md index 8864b13..2f2f0b0 100644 --- a/dev/Readme.md +++ b/dev/Readme.md @@ -40,6 +40,7 @@ ```bash export AZURE_OPENAI_API_KEY="" # required AZURE OPENAI USAGE export SERPER_API_KEY="" # required for Google Serper API + export SEARCHAPI_API_KEY="" # required to perform web searches on any search engines. python usecases/ProfAgent.py ``` diff --git a/docs/components/action/tools.md b/docs/components/action/tools.md index 5a4d28c..1733465 100644 --- a/docs/components/action/tools.md +++ b/docs/components/action/tools.md @@ -25,7 +25,35 @@ admin = Admin( ) ``` -### 2. Serper Search Tool +### 2. SearchApi Search + +[SearchApi.io](https://searchapi.io/) provides a real-time API to access search results from Google (default), Google Scholar, Bing, Baidu, and other search engines. Any existing or upcoming SERP engine that returns `organic_results` is supported. The default web search engine is `google`, but it can be changed to `bing`, `baidu`, `google_news`, `bing_news`, `google_scholar`, `google_patents`, and others. + +#### Setup API + +```python +import os + +os.environ['SEARCHAPI_API_KEY'] = "" +os.environ['SEARCHAPI_ENGINE'] = "bing" # defaults to google. +``` + +Get your API key by creating an account or logging into your account on [SearchApi](https://searchapi.io/). + +```python +from openagi.actions.tools.searchapi_search import SearchApiSearch +from openagi.agent import Admin +from openagi.llms.openai import OpenAIModel +from openagi.planner.task_decomposer import TaskPlanner + +admin = Admin( + llm = llm, + actions=[SearchApiSearch], + planner=TaskPlanner(), +) +``` + +### 3. Serper Search Tool Serper is a low-cost Google Search API that can be used to add answer box, knowledge graph, and organic results data from Google Search. This tool is mainly helps user to query the Google results with less throughput and latency. @@ -52,7 +80,7 @@ admin = Admin( ) ``` -### 3. Google Serp API Search +### 4. Google Serp API Search Serp API is yet another solution to integrate search data. SERP stands for _Search Engine Results Page_. It refers to the page displayed by a search engine in response to a user's query. @@ -79,7 +107,7 @@ admin = Admin( ) ``` -### 4. Github Search Tool +### 5. Github Search Tool The Github SearchTool is used for retrieving information from Github repositories using natural language queries. This tool provides functionality for querying Github repositories for various information, such as code changes, commits, active pull requests, issues, etc., using natural language input. It is designed to be used as part of a larger AI-driven agent system. @@ -106,7 +134,7 @@ admin = Admin( ) ``` -### 5. YouTube Search Tool +### 6. YouTube Search Tool The YouTube Search tool allows users to search for videos on YouTube using natural language queries. This tool retrieves relevant video content based on user-defined search parameters, making it easier to find specific videos or topics of interest. @@ -132,7 +160,7 @@ admin = Admin( ) ``` -### 6. Tavily QA Search Tool +### 7. Tavily QA Search Tool The Tavily QA Search tool is designed to provide answers to user queries by fetching data from various online sources. This tool enhances the capability of the agent to retrieve precise information and answer questions effectively. @@ -166,7 +194,7 @@ admin = Admin( ) ``` -### 7. Exa Search Tool +### 8. Exa Search Tool The Exa Search tool allows users to query the Exa API to retrieve relevant responses based on user-defined questions. This tool is particularly useful for extracting information and insights from various data sources using natural language queries. @@ -202,7 +230,7 @@ admin = Admin( ) ``` -### 8. Unstructured PDF Loader Tool +### 9. Unstructured PDF Loader Tool The Unstructured PDF Loader tool is designed to extract content, including metadata, from PDF files. It utilizes the Unstructured library to partition the PDF and chunk the content based on titles. This tool is useful for processing large volumes of PDF documents and making their contents accessible for further analysis. diff --git a/docs/use-cases/jobsearch-agent.md b/docs/use-cases/jobsearch-agent.md index 0eb5b6a..b8a8101 100644 --- a/docs/use-cases/jobsearch-agent.md +++ b/docs/use-cases/jobsearch-agent.md @@ -6,7 +6,7 @@ It utilize various tools for internet search and document comparison to fulfill First, we need to import the necessary modules. Each module serves a specific purpose in our script. We utilize various tools for internet search and document comparison to fulfill the agent's task. Here’s what each import does: -* `GoogleSerpAPISearch` and `DuckDuckGoSearch` are tools for performing web searches. +* `SearchApiSearch`, `GoogleSerpAPISearch` and `DuckDuckGoSearch` are tools for performing web searches. * `Admin` manages the overall execution of tasks. * `AzureChatOpenAIModel` is used to configure the large language model from Azure. * `Memory` is for maintaining context during the agent's operations. diff --git a/src/openagi/actions/tools/searchapi_search.py b/src/openagi/actions/tools/searchapi_search.py new file mode 100644 index 0000000..42bd635 --- /dev/null +++ b/src/openagi/actions/tools/searchapi_search.py @@ -0,0 +1,44 @@ +import logging +import os +import requests +from urllib.parse import urlencode +from typing import Any + +from pydantic import Field + +from openagi.actions.base import BaseAction +from openagi.exception import OpenAGIException + + +class SearchApiSearch(BaseAction): + """SearchApi.io provides a real-time API to access search results from Google (default), Google Scholar, Bing, Baidu, and other search engines.""" + query: str = Field( + ..., description="User query of type string used to fetch web search results from a search engine." + ) + + def execute(self): + base_url = "https://www.searchapi.io/api/v1/search" + searchapi_api_key = os.environ["SEARCHAPI_API_KEY"] + engine = os.environ.get("SEARCHAPI_ENGINE") or "google" + search_dict = { + "q": self.query, + "engine": engine, + "api_key": searchapi_api_key + } + logging.debug(f"{search_dict=}") + url = f"{base_url}?{urlencode(search_dict)}" + response = requests.request("GET", url) + json_response = response.json() + + if not json_response: + raise OpenAGIException(f"Unable to generate result for the query {self.query}") + + logging.debug(json_response) + + organic_results = json_response.get("organic_results", []) + + meta_data = "" + for organic_result in organic_results: + meta_data += f"CONTEXT: {organic_result['title']} \ {organic_result['snippet']}" + meta_data += f"Reference URL: {organic_result['link']}\n" + return meta_data diff --git a/src/openagi/utils/tool_list.py b/src/openagi/utils/tool_list.py index 8127876..0176e83 100644 --- a/src/openagi/utils/tool_list.py +++ b/src/openagi/utils/tool_list.py @@ -2,6 +2,7 @@ from openagi.actions.tools import ( ddg_search, document_loader, + searchapi_search, serp_search, serper_search, webloader, @@ -14,6 +15,7 @@ modules = [ document_loader, ddg_search, + searchapi_search, serp_search, serper_search, webloader,