Skip to content

Commit

Permalink
feature: add initial wikipedia search tool (#344)
Browse files Browse the repository at this point in the history
* feat: add initial wikipedia search tool

Signed-off-by: AngeloDanducci <[email protected]>

* fix: allow optional full text, section titles, other languages

Signed-off-by: AngeloDanducci <[email protected]>

* refactor(tools): removes agent use in wikipedia example

Signed-off-by: AngeloDanducci <[email protected]>

---------

Signed-off-by: AngeloDanducci <[email protected]>
  • Loading branch information
AngeloDanducci authored Feb 25, 2025
1 parent 765a4ba commit 8bed3f7
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 9 deletions.
3 changes: 2 additions & 1 deletion python/beeai_framework/tools/search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,6 @@
# manually defined import order is import here to avoid circular imports
from beeai_framework.tools.search.base import SearchToolResult, SearchToolOutput
from beeai_framework.tools.search.duckduckgo import DuckDuckGoSearchTool
from beeai_framework.tools.search.wikipedia import WikipediaTool

__all__ = ["DuckDuckGoSearchTool", "SearchToolOutput", "SearchToolResult"]
__all__ = ["DuckDuckGoSearchTool", "SearchToolOutput", "SearchToolResult", "WikipediaTool"]
63 changes: 56 additions & 7 deletions python/beeai_framework/tools/search/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,65 @@
# limitations under the License.


from typing import Any

import wikipediaapi
from pydantic import BaseModel, Field

from beeai_framework.tools.search import SearchToolOutput, SearchToolResult
from beeai_framework.tools.tool import Tool


class WikipediaTool(Tool):
class WikipediaToolInput(BaseModel):
query: str = Field(description="Search query, name of the Wikipedia page.")
full_text: bool = Field(description="If set to true will return the full text of the page.", default=False)
section_titles: bool = Field(description="If set to true returns section titles as the description.", default=False)
language: str | None = Field(description="Retrieves specified language version if available.", default=None)


class WikipediaToolResult(SearchToolResult):
pass


class WikipediaToolOutput(SearchToolOutput):
pass


class WikipediaTool(Tool[WikipediaToolInput]):
name = "Wikipedia"
description = "Search factual and historical information, including biography, history, politics, geography, society, culture, science, technology, people, animal species, mathematics, and other subjects." # noqa: E501
description = "Search factual and historical information, including biography, \
history, politics, geography, society, culture, science, technology, people, \
animal species, mathematics, and other subjects."
input_schema = WikipediaToolInput
client = wikipediaapi.Wikipedia(
user_agent="beeai-framework https://github.com/i-am-bee/beeai-framework", language="en"
)

def get_section_titles(self, sections: wikipediaapi.WikipediaPage.sections) -> str:
titles = []
for section in sections:
titles.append(section.title)
return ",".join(str(title) for title in titles)

def _run(self, input: WikipediaToolInput, _: Any | None = None) -> WikipediaToolOutput:
page_py = self.client.page(input.query)

if page_py.exists():
if input.language is not None and input.language in page_py.langlinks:
page_py = page_py.langlinks[input.language]

def input_schema(self) -> str:
# TODO: remove hard code
return '{"type":"object","properties":{"query":{"type":"string","format":"date","description":"Name of the wikipedia page, for example \'New York\'"}}}' # noqa: E501
if input.section_titles:
description_output = self.get_section_titles(page_py.sections)
elif input.full_text:
description_output = page_py.text
else:
description_output = page_py.summary

def _run(self) -> None:
pass
search_results: list[WikipediaToolResult] = [
WikipediaToolResult(
title=input.query or "", description=description_output or "", url=page_py.fullurl or ""
)
]
return WikipediaToolOutput(search_results)
else:
raise Exception(f"No Wikipedia page matched the search term: {input.query}.")
27 changes: 27 additions & 0 deletions python/docs/tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,33 @@ if __name__ == "__main__":

_Source: [examples/tools/openmeteo.py](/examples/tools/openmeteo.py)_

### Usage with Wikipedia

<!-- embedme examples/tools/wikipedia.py -->

```py
import asyncio

from beeai_framework.tools.search.wikipedia import (
WikipediaTool,
WikipediaToolInput,
)


async def main() -> None:
wikipedia_client = WikipediaTool(full_text=True)
input = WikipediaToolInput(query="bee")
result = wikipedia_client.run(input)
print(result.get_text_content())


if __name__ == "__main__":
asyncio.run(main())

```

_Source: [examples/tools/wikipedia.py](/examples/tools/wikipedia.py)_

## Writing a new tool

To create a new tool, you have the following options on how to do that:
Expand Down
17 changes: 17 additions & 0 deletions python/examples/tools/wikipedia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import asyncio

from beeai_framework.tools.search.wikipedia import (
WikipediaTool,
WikipediaToolInput,
)


async def main() -> None:
wikipedia_client = WikipediaTool(full_text=True)
input = WikipediaToolInput(query="bee")
result = wikipedia_client.run(input)
print(result.get_text_content())


if __name__ == "__main__":
asyncio.run(main())
16 changes: 15 additions & 1 deletion python/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ wikipedia = "^1.4.0"
mcp = "^1.2.0"
duckduckgo-search = "^7.3.2"
json-repair = "^0.39.0"
wikipedia-api = "^0.8.1"

[tool.poetry.group.dev.dependencies]
pytest = "^8.3.4"
Expand Down
62 changes: 62 additions & 0 deletions python/tests/tools/test_wikipedia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright 2025 IBM Corp.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import pytest

from beeai_framework.tools import ToolInputValidationError
from beeai_framework.tools.search.wikipedia import (
WikipediaTool,
WikipediaToolInput,
WikipediaToolOutput,
)


@pytest.fixture
def tool() -> WikipediaTool:
return WikipediaTool()


@pytest.mark.e2e
def test_call_invalid_input_type(tool: WikipediaTool) -> None:
with pytest.raises(ToolInputValidationError):
tool.run(input={"search": "Bee"})


@pytest.mark.e2e
def test_output(tool: WikipediaTool) -> None:
result = tool.run(input=WikipediaToolInput(query="bee"))
assert type(result) is WikipediaToolOutput
assert "Bees are winged insects closely related to wasps and ants" in result.get_text_content()


@pytest.mark.e2e
def test_full_text_output(tool: WikipediaTool) -> None:
result = tool.run(input=WikipediaToolInput(query="bee", full_text=True))
assert type(result) is WikipediaToolOutput
assert "n-triscosane" in result.get_text_content()


@pytest.mark.e2e
def test_section_titles(tool: WikipediaTool) -> None:
result = tool.run(input=WikipediaToolInput(query="bee", section_titles=True))
assert type(result) is WikipediaToolOutput
assert "Characteristics" in result.get_text_content()


@pytest.mark.e2e
def test_alternate_language(tool: WikipediaTool) -> None:
result = tool.run(input=WikipediaToolInput(query="bee", language="fr"))
assert type(result) is WikipediaToolOutput
assert "Les abeilles (Anthophila) forment un clade d'insectes" in result.get_text_content()

0 comments on commit 8bed3f7

Please sign in to comment.