Add tests for epub upload and epub reading

BurnySc2 · Oct 18, 2024 · f7cd0cc · f7cd0cc
1 parent 84d84c3
commit f7cd0cc
Show file tree

Hide file tree

Showing 14 changed files with 181 additions and 37 deletions.
diff --git a/fastapi_server/docker-compose.yml b/fastapi_server/docker-compose.yml
@@ -16,7 +16,7 @@ x-services-fragments:
       [
         "sh",
         "-c",
-        "poetry run python src/workers/convert_audiobook.py && sleep 10m",
+        "poetry run python src/workers/convert_audiobook.py && sleep 1m",
       ]
     env_file:
       - .env
@@ -169,6 +169,7 @@ services:
 
   local_dev_postgres_test:
     # This container is only used for running tests
+    container_name: fastapi_dev_postgres_test
     hostname: fastapi_dev_postgres_test
     image: postgres:16-alpine
     restart: unless-stopped

diff --git a/fastapi_server/src/routes/audiobook/temp_read_epub.py b/fastapi_server/src/routes/audiobook/temp_read_epub.py
@@ -20,7 +20,7 @@ def extract_sentences(text: str) -> list[str]:
 def combine_text(text_as_list: list[str]) -> str:
     combined_text = " ".join(row for row in text_as_list)
     combined_text = re.sub(r"\s+", " ", combined_text)
-    return combined_text
+    return combined_text.strip()
 
 
 class EpubChapter(BaseModel):

diff --git a/fastapi_server/test/base_test.py b/fastapi_server/test/base_test.py
@@ -5,16 +5,25 @@
 from litestar.testing import TestClient
 from pytest_httpx import HTTPXMock
 
+from prisma.cli import prisma
 from src.app import app
 from src.routes.login_logout import COOKIES
 
 
+# TODO Use one fixture that resets db and another that doesnt
 @pytest.fixture(scope="function")
 def test_client() -> Iterator[TestClient[Litestar]]:
     with TestClient(app=app, raise_server_exceptions=True) as client:
         yield client
 
 
+@pytest.fixture(scope="function")
+def test_client_db_reset() -> Iterator[TestClient[Litestar]]:
+    prisma.run(["db", "push", "--force-reset"], check=True)
+    with TestClient(app=app, raise_server_exceptions=True) as client:
+        yield client
+
+
 def log_in_with_twitch(test_client: TestClient, httpx_mock: HTTPXMock) -> None:
     test_client.cookies[COOKIES["twitch"]] = "valid_access_token"
     httpx_mock.add_response(

diff --git a/.../audiobook/actual_books/frankenstein.epub → .../audiobook/actual_books/frankenstein.epub b/.../audiobook/actual_books/frankenstein.epub → .../audiobook/actual_books/frankenstein.epub
diff --git a/...iobook/actual_books/romeo-and-juliet.epub → ...iobook/actual_books/romeo-and-juliet.epub b/...iobook/actual_books/romeo-and-juliet.epub → ...iobook/actual_books/romeo-and-juliet.epub
diff --git a/...k/actual_books/the-war-of-the-worlds.epub → ...k/actual_books/the-war-of-the-worlds.epub b/...k/actual_books/the-war-of-the-worlds.epub → ...k/actual_books/the-war-of-the-worlds.epub
diff --git a/fastapi_server/test/endpoints/audiobook/test_read_epub.py b/fastapi_server/test/endpoints/audiobook/test_read_epub.py
@@ -0,0 +1,142 @@
+import io
+from pathlib import Path
+
+from ebooklib import epub
+from hypothesis import given
+from hypothesis import strategies as st
+import pytest
+
+from src.routes.audiobook.temp_read_epub import combine_text, extract_chapters, extract_metadata
+
+
+def generate_epub_helper(book_title: str, book_author: str, chapters: dict[str, str]) -> io.BytesIO:
+    book = epub.EpubBook()
+
+    # set metadata
+    book.set_identifier("id123456")
+    book.set_title(book_title)
+    book.set_language("en")
+
+    book.add_author(book_author)
+    # Why is this needed?
+    book.add_author(
+        "Danko Bananko",
+        file_as="Gospodin Danko Bananko",
+        role="ill",
+        uid="coauthor",
+    )
+
+    # create chapter
+    created_chapters = {}
+    for chapter_id, (chapter_title, chapter_content) in enumerate(chapters.items(), start=1):
+        c1 = epub.EpubHtml(
+            file_name=f"chap_{chapter_id:04d}.xhtml",
+            title=chapter_title,
+            content=chapter_content,
+            lang="en",
+        )
+
+        # add chapter
+        book.add_item(c1)
+        created_chapters[chapter_title] = c1
+
+    # See https://github.com/aerkalov/ebooklib/
+    # define Table Of Contents
+    book.toc = (
+        # Why are these extra chapter needed?
+        epub.Link("intro.xhtml", "Introduction", "intro"),
+        (epub.Section("Languages"), tuple(created_chapters.values())),
+    )
+
+    # add default NCX and Nav file
+    book.add_item(epub.EpubNcx())
+    book.add_item(epub.EpubNav())
+
+    book_in_memory = io.BytesIO()
+    epub.write_epub(book_in_memory, book, {})
+    return book_in_memory
+
+
+# https://stackoverflow.com/a/57754227/10882657
+@given(
+    book_title=st.from_regex(r"\w[\w\d\u0370-\u03FF\u0400-\u04FF_ -]*", fullmatch=True),
+    book_author=st.from_regex(r"\w[\w\d\u0370-\u03FF\u0400-\u04FF_ -]*", fullmatch=True),
+)
+def test_epub_reader_extract_metadata(
+    book_title: str,
+    book_author: str,
+):
+    epub_book = generate_epub_helper(
+        book_title=book_title,
+        book_author=book_author,
+        chapters={},
+    )
+    book_metadata = extract_metadata(epub_book)
+    assert book_metadata.title == book_title
+    assert book_metadata.author == book_author
+
+
+def test_epub_reader_extract_chapters_simple():
+    epub_book = generate_epub_helper(
+        book_title="test title",
+        book_author="test author",
+        chapters={"asd": "asd", "asd2": "asd2"},
+    )
+    book_chapters = extract_chapters(epub_book)
+    assert len(book_chapters) == 2
+
+
+@given(
+    chapters=st.dictionaries(
+        # Chapter title
+        keys=st.from_regex(r"\w[\w \n]*", fullmatch=True),
+        # Chapter content
+        values=st.from_regex(r"\w[\w \n]*", fullmatch=True),
+        # Alternative parsing if only 1 chapter was detected
+        min_size=2,
+        max_size=10**4 - 1,
+    ),
+)
+def test_epub_reader_extract_chapters(
+    chapters: dict[str, str],
+):
+    # 2 chapters following each other need to have different text
+    chapters = {
+        chapter_title: chapter_content
+        for (chapter_title, chapter_content), (_chapter_title2, chapter_content2) in zip(
+            chapters.items(), list(chapters.items())[1:]
+        )
+        if chapter_content != chapter_content2
+    }
+    if len(chapters) < 2:
+        return
+
+    epub_book = generate_epub_helper(
+        book_title="test title",
+        book_author="test author",
+        chapters=chapters,
+    )
+
+    # Sanity check: Chapter count needs to be the same
+    book_chapters = extract_chapters(epub_book)
+    assert len(chapters) == len(book_chapters)
+
+    # Check that chapters are identical
+    for real_chapter, (expected_chapter_title, expected_chapter_content) in zip(book_chapters, chapters.items()):
+        assert real_chapter.chapter_title == expected_chapter_title
+        assert real_chapter.combined_text == combine_text([expected_chapter_content])
+
+
+@pytest.mark.parametrize(
+    "book_relative_path, chapters_amount",
+    [
+        ("actual_books/frankenstein.epub", 31),
+        ("actual_books/romeo-and-juliet.epub", 28),
+        ("actual_books/the-war-of-the-worlds.epub", 29),
+    ],
+)
+def test_parsing_real_epubs(book_relative_path: str, chapters_amount: int) -> None:  # noqa: F811
+    book_path = Path(__file__).parent / book_relative_path
+    book_bytes_io = io.BytesIO(book_path.read_bytes())
+    chapters_extracted = extract_chapters(book_bytes_io)
+    assert len(chapters_extracted) == chapters_amount
diff --git a/..._test/audiobook/test_route_upload_epub.py → ...oints/audiobook/test_route_upload_epub.py b/..._test/audiobook/test_route_upload_epub.py → ...oints/audiobook/test_route_upload_epub.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from test.base_test import log_in_with_twitch, test_client  # noqa: F401
+from test.base_test import log_in_with_twitch, test_client, test_client_db_reset  # noqa: F401
 
 import pytest
 from bs4 import BeautifulSoup  # pyre-fixme[21]
@@ -8,14 +8,10 @@
 from litestar.testing import TestClient
 from pytest_httpx import HTTPXMock
 
-_test_client = test_client
-
+from src.routes.cookies_and_guards import twitch_cache
 
-def setup_function(function):
-    # prisma.run(["db", "push", "--force-reset"], check=True)
-    pass
-    # TODO Can't access db directly from test functions, but the server seems to handle it correctly
-    # Perhaps adding a test-endpoint to verify that data is in the database?
+_test_client = test_client
+_test_client_db_reset = test_client_db_reset
 
 
 def test_index_route_inaccessable_when_not_logged_in(test_client: TestClient) -> None:  # noqa: F811
@@ -24,80 +20,77 @@ def test_index_route_inaccessable_when_not_logged_in(test_client: TestClient) ->
 
 
 # Test "/" has upload button
-def test_index_route_has_upload_button(test_client: TestClient, httpx_mock: HTTPXMock) -> None:  # noqa: F811
-    log_in_with_twitch(test_client, httpx_mock)
-    response = test_client.get("/audiobook/epub_upload")
+def test_index_route_has_upload_button(test_client_db_reset: TestClient, httpx_mock: HTTPXMock) -> None:  # noqa: F811
+    log_in_with_twitch(test_client_db_reset, httpx_mock)
+    response = test_client_db_reset.get("/audiobook/epub_upload")
     assert response.status_code == HTTP_200_OK
     # assert button exists with text "Upload"
     soup = BeautifulSoup(response.text, features="lxml")
     assert len(soup.find_all("button", type="submit")) == 1
 
 
 # Test post request to "/" can upload an epub
-@pytest.mark.skip(reason="broke when switched to Prisma")
 @pytest.mark.parametrize(
     "book_relative_path, book_id, chapters_amount",
     [
         ("actual_books/frankenstein.epub", 1, 31),
-        ("actual_books/romeo-and-juliet.epub", 2, 28),
-        ("actual_books/the-war-of-the-worlds.epub", 3, 29),
+        ("actual_books/romeo-and-juliet.epub", 1, 28),
+        ("actual_books/the-war-of-the-worlds.epub", 1, 29),
     ],
 )
-def test_index_route_upload_epub(
-    book_relative_path: str, book_id: int, chapters_amount: int, test_client: TestClient, httpx_mock: HTTPXMock
+@pytest.mark.httpx_mock(non_mocked_hosts=["localhost"])
+@pytest.mark.asyncio
+async def test_index_route_upload_epub(
+    book_relative_path: str, book_id: int, chapters_amount: int, test_client_db_reset: TestClient, httpx_mock: HTTPXMock
 ) -> None:  # noqa: F811
-    log_in_with_twitch(test_client, httpx_mock)
+    await twitch_cache.delete_all()
+    log_in_with_twitch(test_client_db_reset, httpx_mock)
 
     # Make sure the book does not exist yet
-    response = test_client.get(f"/audiobook/book/{book_id}")
+    response = test_client_db_reset.get(f"/audiobook/book/{book_id}")
     assert response.status_code == HTTP_401_UNAUTHORIZED
 
     # Upload book
     book_path = Path(__file__).parent / book_relative_path
-    response = test_client.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
+    response = test_client_db_reset.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
     assert response.status_code == HTTP_201_CREATED
     # Why is the database not empty?
     assert response.headers.get(HTMXHeaders.REDIRECT) == f"/audiobook/book/{book_id}"
     assert response.headers.get("location") is None
 
-    # Clean up responses to avoid assertion failure
-    httpx_mock.reset(assert_all_responses_were_requested=False)
-
     # Make sure N chapters were detected
-    response2 = test_client.get(response.headers.get(HTMXHeaders.REDIRECT))
+    response2 = test_client_db_reset.get(response.headers.get(HTMXHeaders.REDIRECT))
     soup = BeautifulSoup(response2.text, features="lxml")
     matching_divs = soup.find_all("div", id=lambda x: x is not None and x.startswith("chapter_audio_"))
     assert response2.status_code == HTTP_200_OK
     assert len(matching_divs) == chapters_amount
 
 
 # Test post request to "/" book already exists
-@pytest.mark.skip(reason="broke when switched to Prisma")
-def test_index_route_upload_epub_twice(test_client: TestClient, httpx_mock: HTTPXMock) -> None:  # noqa: F811
-    log_in_with_twitch(test_client, httpx_mock)
+@pytest.mark.httpx_mock(non_mocked_hosts=["localhost"])
+@pytest.mark.asyncio
+async def test_index_route_upload_epub_twice(test_client_db_reset: TestClient, httpx_mock: HTTPXMock) -> None:  # noqa: F811
+    await twitch_cache.delete_all()
+    log_in_with_twitch(test_client_db_reset, httpx_mock)
 
     # Make sure the book does not exist yet
-    response = test_client.get("/audiobook/book/1")
-    # Why is this not 401?
-    assert response.status_code == HTTP_200_OK
+    response = test_client_db_reset.get("/audiobook/book/1")
+    assert response.status_code == HTTP_401_UNAUTHORIZED
 
     # Upload book the first time
     book_path = Path(__file__).parent / "actual_books/frankenstein.epub"
-    response2 = test_client.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
+    response2 = test_client_db_reset.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
     assert response2.status_code == HTTP_201_CREATED
     assert response2.headers.get(HTMXHeaders.REDIRECT) == "/audiobook/book/1"
     assert response2.headers.get("location") is None
 
     # Upload a second time
-    response3 = test_client.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
+    response3 = test_client_db_reset.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")})
     assert response2.status_code == HTTP_201_CREATED
     # Make sure it points to the same book
     assert response3.headers.get(HTMXHeaders.REDIRECT) == "/audiobook/book/1"
     assert response3.headers.get("location") is None
 
-    # Clean up responses to avoid assertion failure
-    httpx_mock.reset(assert_all_responses_were_requested=False)
-
 
 # Test "/book/book_id" does not have an uploaded book
 

diff --git a/...endpoints_test/login/test_login_github.py → ...test/endpoints/login/test_login_github.py b/...endpoints_test/login/test_login_github.py → ...test/endpoints/login/test_login_github.py
diff --git a/...endpoints_test/login/test_login_twitch.py → ...test/endpoints/login/test_login_twitch.py b/...endpoints_test/login/test_login_twitch.py → ...test/endpoints/login/test_login_twitch.py
diff --git a/.../test/endpoints_test/login/test_logout.py → ...erver/test/endpoints/login/test_logout.py b/.../test/endpoints_test/login/test_logout.py → ...erver/test/endpoints/login/test_logout.py
diff --git a/...r/test/endpoints_test/test_hello_world.py → ...server/test/endpoints/test_hello_world.py b/...r/test/endpoints_test/test_hello_world.py → ...server/test/endpoints/test_hello_world.py
diff --git a/...ver/test/endpoints_test/test_htmx_todo.py → ...i_server/test/endpoints/test_htmx_todo.py b/...ver/test/endpoints_test/test_htmx_todo.py → ...i_server/test/endpoints/test_htmx_todo.py
diff --git a/fastapi_server/test/endpoints_test/audiobook/test_read_epub.py b/fastapi_server/test/endpoints_test/audiobook/test_read_epub.py