Skip to content

Commit

Permalink
Add tests for epub upload and epub reading
Browse files Browse the repository at this point in the history
  • Loading branch information
BurnySc2 committed Oct 18, 2024
1 parent 84d84c3 commit f7cd0cc
Show file tree
Hide file tree
Showing 14 changed files with 181 additions and 37 deletions.
3 changes: 2 additions & 1 deletion fastapi_server/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ x-services-fragments:
[
"sh",
"-c",
"poetry run python src/workers/convert_audiobook.py && sleep 10m",
"poetry run python src/workers/convert_audiobook.py && sleep 1m",
]
env_file:
- .env
Expand Down Expand Up @@ -169,6 +169,7 @@ services:

local_dev_postgres_test:
# This container is only used for running tests
container_name: fastapi_dev_postgres_test
hostname: fastapi_dev_postgres_test
image: postgres:16-alpine
restart: unless-stopped
Expand Down
2 changes: 1 addition & 1 deletion fastapi_server/src/routes/audiobook/temp_read_epub.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def extract_sentences(text: str) -> list[str]:
def combine_text(text_as_list: list[str]) -> str:
combined_text = " ".join(row for row in text_as_list)
combined_text = re.sub(r"\s+", " ", combined_text)
return combined_text
return combined_text.strip()


class EpubChapter(BaseModel):
Expand Down
9 changes: 9 additions & 0 deletions fastapi_server/test/base_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,25 @@
from litestar.testing import TestClient
from pytest_httpx import HTTPXMock

from prisma.cli import prisma
from src.app import app
from src.routes.login_logout import COOKIES


# TODO Use one fixture that resets db and another that doesnt
@pytest.fixture(scope="function")
def test_client() -> Iterator[TestClient[Litestar]]:
with TestClient(app=app, raise_server_exceptions=True) as client:
yield client


@pytest.fixture(scope="function")
def test_client_db_reset() -> Iterator[TestClient[Litestar]]:
prisma.run(["db", "push", "--force-reset"], check=True)
with TestClient(app=app, raise_server_exceptions=True) as client:
yield client


def log_in_with_twitch(test_client: TestClient, httpx_mock: HTTPXMock) -> None:
test_client.cookies[COOKIES["twitch"]] = "valid_access_token"
httpx_mock.add_response(
Expand Down
142 changes: 142 additions & 0 deletions fastapi_server/test/endpoints/audiobook/test_read_epub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import io
from pathlib import Path

from ebooklib import epub
from hypothesis import given
from hypothesis import strategies as st
import pytest

from src.routes.audiobook.temp_read_epub import combine_text, extract_chapters, extract_metadata


def generate_epub_helper(book_title: str, book_author: str, chapters: dict[str, str]) -> io.BytesIO:
book = epub.EpubBook()

# set metadata
book.set_identifier("id123456")
book.set_title(book_title)
book.set_language("en")

book.add_author(book_author)
# Why is this needed?
book.add_author(
"Danko Bananko",
file_as="Gospodin Danko Bananko",
role="ill",
uid="coauthor",
)

# create chapter
created_chapters = {}
for chapter_id, (chapter_title, chapter_content) in enumerate(chapters.items(), start=1):
c1 = epub.EpubHtml(
file_name=f"chap_{chapter_id:04d}.xhtml",
title=chapter_title,
content=chapter_content,
lang="en",
)

# add chapter
book.add_item(c1)
created_chapters[chapter_title] = c1

# See https://github.com/aerkalov/ebooklib/
# define Table Of Contents
book.toc = (
# Why are these extra chapter needed?
epub.Link("intro.xhtml", "Introduction", "intro"),
(epub.Section("Languages"), tuple(created_chapters.values())),
)

# add default NCX and Nav file
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())

book_in_memory = io.BytesIO()
epub.write_epub(book_in_memory, book, {})
return book_in_memory


# https://stackoverflow.com/a/57754227/10882657
@given(
book_title=st.from_regex(r"\w[\w\d\u0370-\u03FF\u0400-\u04FF_ -]*", fullmatch=True),
book_author=st.from_regex(r"\w[\w\d\u0370-\u03FF\u0400-\u04FF_ -]*", fullmatch=True),
)
def test_epub_reader_extract_metadata(
book_title: str,
book_author: str,
):
epub_book = generate_epub_helper(
book_title=book_title,
book_author=book_author,
chapters={},
)
book_metadata = extract_metadata(epub_book)
assert book_metadata.title == book_title
assert book_metadata.author == book_author


def test_epub_reader_extract_chapters_simple():
epub_book = generate_epub_helper(
book_title="test title",
book_author="test author",
chapters={"asd": "asd", "asd2": "asd2"},
)
book_chapters = extract_chapters(epub_book)
assert len(book_chapters) == 2


@given(
chapters=st.dictionaries(
# Chapter title
keys=st.from_regex(r"\w[\w \n]*", fullmatch=True),
# Chapter content
values=st.from_regex(r"\w[\w \n]*", fullmatch=True),
# Alternative parsing if only 1 chapter was detected
min_size=2,
max_size=10**4 - 1,
),
)
def test_epub_reader_extract_chapters(
chapters: dict[str, str],
):
# 2 chapters following each other need to have different text
chapters = {
chapter_title: chapter_content
for (chapter_title, chapter_content), (_chapter_title2, chapter_content2) in zip(
chapters.items(), list(chapters.items())[1:]
)
if chapter_content != chapter_content2
}
if len(chapters) < 2:
return

epub_book = generate_epub_helper(
book_title="test title",
book_author="test author",
chapters=chapters,
)

# Sanity check: Chapter count needs to be the same
book_chapters = extract_chapters(epub_book)
assert len(chapters) == len(book_chapters)

# Check that chapters are identical
for real_chapter, (expected_chapter_title, expected_chapter_content) in zip(book_chapters, chapters.items()):
assert real_chapter.chapter_title == expected_chapter_title
assert real_chapter.combined_text == combine_text([expected_chapter_content])


@pytest.mark.parametrize(
"book_relative_path, chapters_amount",
[
("actual_books/frankenstein.epub", 31),
("actual_books/romeo-and-juliet.epub", 28),
("actual_books/the-war-of-the-worlds.epub", 29),
],
)
def test_parsing_real_epubs(book_relative_path: str, chapters_amount: int) -> None: # noqa: F811
book_path = Path(__file__).parent / book_relative_path
book_bytes_io = io.BytesIO(book_path.read_bytes())
chapters_extracted = extract_chapters(book_bytes_io)
assert len(chapters_extracted) == chapters_amount
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from test.base_test import log_in_with_twitch, test_client # noqa: F401
from test.base_test import log_in_with_twitch, test_client, test_client_db_reset # noqa: F401

import pytest
from bs4 import BeautifulSoup # pyre-fixme[21]
Expand All @@ -8,14 +8,10 @@
from litestar.testing import TestClient
from pytest_httpx import HTTPXMock

_test_client = test_client

from src.routes.cookies_and_guards import twitch_cache

def setup_function(function):
# prisma.run(["db", "push", "--force-reset"], check=True)
pass
# TODO Can't access db directly from test functions, but the server seems to handle it correctly
# Perhaps adding a test-endpoint to verify that data is in the database?
_test_client = test_client
_test_client_db_reset = test_client_db_reset


def test_index_route_inaccessable_when_not_logged_in(test_client: TestClient) -> None: # noqa: F811
Expand All @@ -24,80 +20,77 @@ def test_index_route_inaccessable_when_not_logged_in(test_client: TestClient) ->


# Test "/" has upload button
def test_index_route_has_upload_button(test_client: TestClient, httpx_mock: HTTPXMock) -> None: # noqa: F811
log_in_with_twitch(test_client, httpx_mock)
response = test_client.get("/audiobook/epub_upload")
def test_index_route_has_upload_button(test_client_db_reset: TestClient, httpx_mock: HTTPXMock) -> None: # noqa: F811
log_in_with_twitch(test_client_db_reset, httpx_mock)
response = test_client_db_reset.get("/audiobook/epub_upload")
assert response.status_code == HTTP_200_OK
# assert button exists with text "Upload"
soup = BeautifulSoup(response.text, features="lxml")
assert len(soup.find_all("button", type="submit")) == 1


# Test post request to "/" can upload an epub
@pytest.mark.skip(reason="broke when switched to Prisma")
@pytest.mark.parametrize(
"book_relative_path, book_id, chapters_amount",
[
("actual_books/frankenstein.epub", 1, 31),
("actual_books/romeo-and-juliet.epub", 2, 28),
("actual_books/the-war-of-the-worlds.epub", 3, 29),
("actual_books/romeo-and-juliet.epub", 1, 28),
("actual_books/the-war-of-the-worlds.epub", 1, 29),
],
)
def test_index_route_upload_epub(
book_relative_path: str, book_id: int, chapters_amount: int, test_client: TestClient, httpx_mock: HTTPXMock
@pytest.mark.httpx_mock(non_mocked_hosts=["localhost"])
@pytest.mark.asyncio
async def test_index_route_upload_epub(
book_relative_path: str, book_id: int, chapters_amount: int, test_client_db_reset: TestClient, httpx_mock: HTTPXMock
) -> None: # noqa: F811
log_in_with_twitch(test_client, httpx_mock)
await twitch_cache.delete_all()
log_in_with_twitch(test_client_db_reset, httpx_mock)

# Make sure the book does not exist yet
response = test_client.get(f"/audiobook/book/{book_id}")
response = test_client_db_reset.get(f"/audiobook/book/{book_id}")
assert response.status_code == HTTP_401_UNAUTHORIZED

# Upload book
book_path = Path(__file__).parent / book_relative_path
response = test_client.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
response = test_client_db_reset.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
assert response.status_code == HTTP_201_CREATED
# Why is the database not empty?
assert response.headers.get(HTMXHeaders.REDIRECT) == f"/audiobook/book/{book_id}"
assert response.headers.get("location") is None

# Clean up responses to avoid assertion failure
httpx_mock.reset(assert_all_responses_were_requested=False)

# Make sure N chapters were detected
response2 = test_client.get(response.headers.get(HTMXHeaders.REDIRECT))
response2 = test_client_db_reset.get(response.headers.get(HTMXHeaders.REDIRECT))
soup = BeautifulSoup(response2.text, features="lxml")
matching_divs = soup.find_all("div", id=lambda x: x is not None and x.startswith("chapter_audio_"))
assert response2.status_code == HTTP_200_OK
assert len(matching_divs) == chapters_amount


# Test post request to "/" book already exists
@pytest.mark.skip(reason="broke when switched to Prisma")
def test_index_route_upload_epub_twice(test_client: TestClient, httpx_mock: HTTPXMock) -> None: # noqa: F811
log_in_with_twitch(test_client, httpx_mock)
@pytest.mark.httpx_mock(non_mocked_hosts=["localhost"])
@pytest.mark.asyncio
async def test_index_route_upload_epub_twice(test_client_db_reset: TestClient, httpx_mock: HTTPXMock) -> None: # noqa: F811
await twitch_cache.delete_all()
log_in_with_twitch(test_client_db_reset, httpx_mock)

# Make sure the book does not exist yet
response = test_client.get("/audiobook/book/1")
# Why is this not 401?
assert response.status_code == HTTP_200_OK
response = test_client_db_reset.get("/audiobook/book/1")
assert response.status_code == HTTP_401_UNAUTHORIZED

# Upload book the first time
book_path = Path(__file__).parent / "actual_books/frankenstein.epub"
response2 = test_client.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
response2 = test_client_db_reset.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
assert response2.status_code == HTTP_201_CREATED
assert response2.headers.get(HTMXHeaders.REDIRECT) == "/audiobook/book/1"
assert response2.headers.get("location") is None

# Upload a second time
response3 = test_client.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
response3 = test_client_db_reset.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
assert response2.status_code == HTTP_201_CREATED
# Make sure it points to the same book
assert response3.headers.get(HTMXHeaders.REDIRECT) == "/audiobook/book/1"
assert response3.headers.get("location") is None

# Clean up responses to avoid assertion failure
httpx_mock.reset(assert_all_responses_were_requested=False)


# Test "/book/book_id" does not have an uploaded book

Expand Down

This file was deleted.

0 comments on commit f7cd0cc

Please sign in to comment.