diff --git a/fastapi_server/docker-compose.yml b/fastapi_server/docker-compose.yml index 6bee7a11..8965bbfb 100644 --- a/fastapi_server/docker-compose.yml +++ b/fastapi_server/docker-compose.yml @@ -16,7 +16,7 @@ x-services-fragments: [ "sh", "-c", - "poetry run python src/workers/convert_audiobook.py && sleep 10m", + "poetry run python src/workers/convert_audiobook.py && sleep 1m", ] env_file: - .env @@ -169,6 +169,7 @@ services: local_dev_postgres_test: # This container is only used for running tests + container_name: fastapi_dev_postgres_test hostname: fastapi_dev_postgres_test image: postgres:16-alpine restart: unless-stopped diff --git a/fastapi_server/src/routes/audiobook/temp_read_epub.py b/fastapi_server/src/routes/audiobook/temp_read_epub.py index 20a8c8ae..e662903b 100644 --- a/fastapi_server/src/routes/audiobook/temp_read_epub.py +++ b/fastapi_server/src/routes/audiobook/temp_read_epub.py @@ -20,7 +20,7 @@ def extract_sentences(text: str) -> list[str]: def combine_text(text_as_list: list[str]) -> str: combined_text = " ".join(row for row in text_as_list) combined_text = re.sub(r"\s+", " ", combined_text) - return combined_text + return combined_text.strip() class EpubChapter(BaseModel): diff --git a/fastapi_server/test/base_test.py b/fastapi_server/test/base_test.py index 81612b07..c3ae73e4 100644 --- a/fastapi_server/test/base_test.py +++ b/fastapi_server/test/base_test.py @@ -5,16 +5,25 @@ from litestar.testing import TestClient from pytest_httpx import HTTPXMock +from prisma.cli import prisma from src.app import app from src.routes.login_logout import COOKIES +# TODO Use one fixture that resets db and another that doesnt @pytest.fixture(scope="function") def test_client() -> Iterator[TestClient[Litestar]]: with TestClient(app=app, raise_server_exceptions=True) as client: yield client +@pytest.fixture(scope="function") +def test_client_db_reset() -> Iterator[TestClient[Litestar]]: + prisma.run(["db", "push", "--force-reset"], check=True) + with TestClient(app=app, raise_server_exceptions=True) as client: + yield client + + def log_in_with_twitch(test_client: TestClient, httpx_mock: HTTPXMock) -> None: test_client.cookies[COOKIES["twitch"]] = "valid_access_token" httpx_mock.add_response( diff --git a/fastapi_server/test/endpoints_test/audiobook/actual_books/frankenstein.epub b/fastapi_server/test/endpoints/audiobook/actual_books/frankenstein.epub similarity index 100% rename from fastapi_server/test/endpoints_test/audiobook/actual_books/frankenstein.epub rename to fastapi_server/test/endpoints/audiobook/actual_books/frankenstein.epub diff --git a/fastapi_server/test/endpoints_test/audiobook/actual_books/romeo-and-juliet.epub b/fastapi_server/test/endpoints/audiobook/actual_books/romeo-and-juliet.epub similarity index 100% rename from fastapi_server/test/endpoints_test/audiobook/actual_books/romeo-and-juliet.epub rename to fastapi_server/test/endpoints/audiobook/actual_books/romeo-and-juliet.epub diff --git a/fastapi_server/test/endpoints_test/audiobook/actual_books/the-war-of-the-worlds.epub b/fastapi_server/test/endpoints/audiobook/actual_books/the-war-of-the-worlds.epub similarity index 100% rename from fastapi_server/test/endpoints_test/audiobook/actual_books/the-war-of-the-worlds.epub rename to fastapi_server/test/endpoints/audiobook/actual_books/the-war-of-the-worlds.epub diff --git a/fastapi_server/test/endpoints/audiobook/test_read_epub.py b/fastapi_server/test/endpoints/audiobook/test_read_epub.py new file mode 100644 index 00000000..fa649cc8 --- /dev/null +++ b/fastapi_server/test/endpoints/audiobook/test_read_epub.py @@ -0,0 +1,142 @@ +import io +from pathlib import Path + +from ebooklib import epub +from hypothesis import given +from hypothesis import strategies as st +import pytest + +from src.routes.audiobook.temp_read_epub import combine_text, extract_chapters, extract_metadata + + +def generate_epub_helper(book_title: str, book_author: str, chapters: dict[str, str]) -> io.BytesIO: + book = epub.EpubBook() + + # set metadata + book.set_identifier("id123456") + book.set_title(book_title) + book.set_language("en") + + book.add_author(book_author) + # Why is this needed? + book.add_author( + "Danko Bananko", + file_as="Gospodin Danko Bananko", + role="ill", + uid="coauthor", + ) + + # create chapter + created_chapters = {} + for chapter_id, (chapter_title, chapter_content) in enumerate(chapters.items(), start=1): + c1 = epub.EpubHtml( + file_name=f"chap_{chapter_id:04d}.xhtml", + title=chapter_title, + content=chapter_content, + lang="en", + ) + + # add chapter + book.add_item(c1) + created_chapters[chapter_title] = c1 + + # See https://github.com/aerkalov/ebooklib/ + # define Table Of Contents + book.toc = ( + # Why are these extra chapter needed? + epub.Link("intro.xhtml", "Introduction", "intro"), + (epub.Section("Languages"), tuple(created_chapters.values())), + ) + + # add default NCX and Nav file + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + + book_in_memory = io.BytesIO() + epub.write_epub(book_in_memory, book, {}) + return book_in_memory + + +# https://stackoverflow.com/a/57754227/10882657 +@given( + book_title=st.from_regex(r"\w[\w\d\u0370-\u03FF\u0400-\u04FF_ -]*", fullmatch=True), + book_author=st.from_regex(r"\w[\w\d\u0370-\u03FF\u0400-\u04FF_ -]*", fullmatch=True), +) +def test_epub_reader_extract_metadata( + book_title: str, + book_author: str, +): + epub_book = generate_epub_helper( + book_title=book_title, + book_author=book_author, + chapters={}, + ) + book_metadata = extract_metadata(epub_book) + assert book_metadata.title == book_title + assert book_metadata.author == book_author + + +def test_epub_reader_extract_chapters_simple(): + epub_book = generate_epub_helper( + book_title="test title", + book_author="test author", + chapters={"asd": "asd", "asd2": "asd2"}, + ) + book_chapters = extract_chapters(epub_book) + assert len(book_chapters) == 2 + + +@given( + chapters=st.dictionaries( + # Chapter title + keys=st.from_regex(r"\w[\w \n]*", fullmatch=True), + # Chapter content + values=st.from_regex(r"\w[\w \n]*", fullmatch=True), + # Alternative parsing if only 1 chapter was detected + min_size=2, + max_size=10**4 - 1, + ), +) +def test_epub_reader_extract_chapters( + chapters: dict[str, str], +): + # 2 chapters following each other need to have different text + chapters = { + chapter_title: chapter_content + for (chapter_title, chapter_content), (_chapter_title2, chapter_content2) in zip( + chapters.items(), list(chapters.items())[1:] + ) + if chapter_content != chapter_content2 + } + if len(chapters) < 2: + return + + epub_book = generate_epub_helper( + book_title="test title", + book_author="test author", + chapters=chapters, + ) + + # Sanity check: Chapter count needs to be the same + book_chapters = extract_chapters(epub_book) + assert len(chapters) == len(book_chapters) + + # Check that chapters are identical + for real_chapter, (expected_chapter_title, expected_chapter_content) in zip(book_chapters, chapters.items()): + assert real_chapter.chapter_title == expected_chapter_title + assert real_chapter.combined_text == combine_text([expected_chapter_content]) + + +@pytest.mark.parametrize( + "book_relative_path, chapters_amount", + [ + ("actual_books/frankenstein.epub", 31), + ("actual_books/romeo-and-juliet.epub", 28), + ("actual_books/the-war-of-the-worlds.epub", 29), + ], +) +def test_parsing_real_epubs(book_relative_path: str, chapters_amount: int) -> None: # noqa: F811 + book_path = Path(__file__).parent / book_relative_path + book_bytes_io = io.BytesIO(book_path.read_bytes()) + chapters_extracted = extract_chapters(book_bytes_io) + assert len(chapters_extracted) == chapters_amount diff --git a/fastapi_server/test/endpoints_test/audiobook/test_route_upload_epub.py b/fastapi_server/test/endpoints/audiobook/test_route_upload_epub.py similarity index 60% rename from fastapi_server/test/endpoints_test/audiobook/test_route_upload_epub.py rename to fastapi_server/test/endpoints/audiobook/test_route_upload_epub.py index 95332ee5..58414d2e 100644 --- a/fastapi_server/test/endpoints_test/audiobook/test_route_upload_epub.py +++ b/fastapi_server/test/endpoints/audiobook/test_route_upload_epub.py @@ -1,5 +1,5 @@ from pathlib import Path -from test.base_test import log_in_with_twitch, test_client # noqa: F401 +from test.base_test import log_in_with_twitch, test_client, test_client_db_reset # noqa: F401 import pytest from bs4 import BeautifulSoup # pyre-fixme[21] @@ -8,14 +8,10 @@ from litestar.testing import TestClient from pytest_httpx import HTTPXMock -_test_client = test_client - +from src.routes.cookies_and_guards import twitch_cache -def setup_function(function): - # prisma.run(["db", "push", "--force-reset"], check=True) - pass - # TODO Can't access db directly from test functions, but the server seems to handle it correctly - # Perhaps adding a test-endpoint to verify that data is in the database? +_test_client = test_client +_test_client_db_reset = test_client_db_reset def test_index_route_inaccessable_when_not_logged_in(test_client: TestClient) -> None: # noqa: F811 @@ -24,9 +20,9 @@ def test_index_route_inaccessable_when_not_logged_in(test_client: TestClient) -> # Test "/" has upload button -def test_index_route_has_upload_button(test_client: TestClient, httpx_mock: HTTPXMock) -> None: # noqa: F811 - log_in_with_twitch(test_client, httpx_mock) - response = test_client.get("/audiobook/epub_upload") +def test_index_route_has_upload_button(test_client_db_reset: TestClient, httpx_mock: HTTPXMock) -> None: # noqa: F811 + log_in_with_twitch(test_client_db_reset, httpx_mock) + response = test_client_db_reset.get("/audiobook/epub_upload") assert response.status_code == HTTP_200_OK # assert button exists with text "Upload" soup = BeautifulSoup(response.text, features="lxml") @@ -34,37 +30,36 @@ def test_index_route_has_upload_button(test_client: TestClient, httpx_mock: HTTP # Test post request to "/" can upload an epub -@pytest.mark.skip(reason="broke when switched to Prisma") @pytest.mark.parametrize( "book_relative_path, book_id, chapters_amount", [ ("actual_books/frankenstein.epub", 1, 31), - ("actual_books/romeo-and-juliet.epub", 2, 28), - ("actual_books/the-war-of-the-worlds.epub", 3, 29), + ("actual_books/romeo-and-juliet.epub", 1, 28), + ("actual_books/the-war-of-the-worlds.epub", 1, 29), ], ) -def test_index_route_upload_epub( - book_relative_path: str, book_id: int, chapters_amount: int, test_client: TestClient, httpx_mock: HTTPXMock +@pytest.mark.httpx_mock(non_mocked_hosts=["localhost"]) +@pytest.mark.asyncio +async def test_index_route_upload_epub( + book_relative_path: str, book_id: int, chapters_amount: int, test_client_db_reset: TestClient, httpx_mock: HTTPXMock ) -> None: # noqa: F811 - log_in_with_twitch(test_client, httpx_mock) + await twitch_cache.delete_all() + log_in_with_twitch(test_client_db_reset, httpx_mock) # Make sure the book does not exist yet - response = test_client.get(f"/audiobook/book/{book_id}") + response = test_client_db_reset.get(f"/audiobook/book/{book_id}") assert response.status_code == HTTP_401_UNAUTHORIZED # Upload book book_path = Path(__file__).parent / book_relative_path - response = test_client.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")}) + response = test_client_db_reset.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")}) assert response.status_code == HTTP_201_CREATED # Why is the database not empty? assert response.headers.get(HTMXHeaders.REDIRECT) == f"/audiobook/book/{book_id}" assert response.headers.get("location") is None - # Clean up responses to avoid assertion failure - httpx_mock.reset(assert_all_responses_were_requested=False) - # Make sure N chapters were detected - response2 = test_client.get(response.headers.get(HTMXHeaders.REDIRECT)) + response2 = test_client_db_reset.get(response.headers.get(HTMXHeaders.REDIRECT)) soup = BeautifulSoup(response2.text, features="lxml") matching_divs = soup.find_all("div", id=lambda x: x is not None and x.startswith("chapter_audio_")) assert response2.status_code == HTTP_200_OK @@ -72,32 +67,30 @@ def test_index_route_upload_epub( # Test post request to "/" book already exists -@pytest.mark.skip(reason="broke when switched to Prisma") -def test_index_route_upload_epub_twice(test_client: TestClient, httpx_mock: HTTPXMock) -> None: # noqa: F811 - log_in_with_twitch(test_client, httpx_mock) +@pytest.mark.httpx_mock(non_mocked_hosts=["localhost"]) +@pytest.mark.asyncio +async def test_index_route_upload_epub_twice(test_client_db_reset: TestClient, httpx_mock: HTTPXMock) -> None: # noqa: F811 + await twitch_cache.delete_all() + log_in_with_twitch(test_client_db_reset, httpx_mock) # Make sure the book does not exist yet - response = test_client.get("/audiobook/book/1") - # Why is this not 401? - assert response.status_code == HTTP_200_OK + response = test_client_db_reset.get("/audiobook/book/1") + assert response.status_code == HTTP_401_UNAUTHORIZED # Upload book the first time book_path = Path(__file__).parent / "actual_books/frankenstein.epub" - response2 = test_client.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")}) + response2 = test_client_db_reset.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")}) assert response2.status_code == HTTP_201_CREATED assert response2.headers.get(HTMXHeaders.REDIRECT) == "/audiobook/book/1" assert response2.headers.get("location") is None # Upload a second time - response3 = test_client.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")}) + response3 = test_client_db_reset.post("/audiobook/epub_upload", files={"upload-file": book_path.open("rb")}) assert response2.status_code == HTTP_201_CREATED # Make sure it points to the same book assert response3.headers.get(HTMXHeaders.REDIRECT) == "/audiobook/book/1" assert response3.headers.get("location") is None - # Clean up responses to avoid assertion failure - httpx_mock.reset(assert_all_responses_were_requested=False) - # Test "/book/book_id" does not have an uploaded book diff --git a/fastapi_server/test/endpoints_test/login/test_login_github.py b/fastapi_server/test/endpoints/login/test_login_github.py similarity index 100% rename from fastapi_server/test/endpoints_test/login/test_login_github.py rename to fastapi_server/test/endpoints/login/test_login_github.py diff --git a/fastapi_server/test/endpoints_test/login/test_login_twitch.py b/fastapi_server/test/endpoints/login/test_login_twitch.py similarity index 100% rename from fastapi_server/test/endpoints_test/login/test_login_twitch.py rename to fastapi_server/test/endpoints/login/test_login_twitch.py diff --git a/fastapi_server/test/endpoints_test/login/test_logout.py b/fastapi_server/test/endpoints/login/test_logout.py similarity index 100% rename from fastapi_server/test/endpoints_test/login/test_logout.py rename to fastapi_server/test/endpoints/login/test_logout.py diff --git a/fastapi_server/test/endpoints_test/test_hello_world.py b/fastapi_server/test/endpoints/test_hello_world.py similarity index 100% rename from fastapi_server/test/endpoints_test/test_hello_world.py rename to fastapi_server/test/endpoints/test_hello_world.py diff --git a/fastapi_server/test/endpoints_test/test_htmx_todo.py b/fastapi_server/test/endpoints/test_htmx_todo.py similarity index 100% rename from fastapi_server/test/endpoints_test/test_htmx_todo.py rename to fastapi_server/test/endpoints/test_htmx_todo.py diff --git a/fastapi_server/test/endpoints_test/audiobook/test_read_epub.py b/fastapi_server/test/endpoints_test/audiobook/test_read_epub.py deleted file mode 100644 index 49b3134b..00000000 --- a/fastapi_server/test/endpoints_test/audiobook/test_read_epub.py +++ /dev/null @@ -1 +0,0 @@ -# TODO Test to load epub books with various formats and metadata missing