test: Add profiling to follow performance in CI

clemlesne · Feb 10, 2025 · 0b8145e · 0b8145e
1 parent 447e0c7
commit 0b8145e
Show file tree

Hide file tree

Showing 12 changed files with 261 additions and 45 deletions.
diff --git a/.github/workflows/pipeline.yaml b/.github/workflows/pipeline.yaml
@@ -52,7 +52,8 @@ jobs:
         # Run all test suites
         step:
           - static
-          - unit
+          - unit-simple
+          - unit-profiling
         # Run on all supported Python versions
         python-version:
           - "3.11"
@@ -103,9 +104,17 @@ jobs:
       - name: Configure environment variables
         run: echo "${{ secrets.DOTENV_UNIT_TESTS }}" > .env
 
+      - name: Run test servers (unit)
+        if: ${{ contains(matrix.step, 'unit') }}
+        run: make run-test-servers
+
       - name: Run tests
         run: make test-${{ matrix.step }} version_full=${{ needs.init.outputs.VERSION_FULL }}
 
+      - name: Kill test servers (unit)
+        if: ${{ contains(matrix.step, 'unit') }}
+        run: make kill-test-servers
+
       - name: Upload artifacts
         uses: actions/[email protected]
         if: always()

diff --git a/.gitignore b/.gitignore
@@ -291,3 +291,6 @@ test-reports/
 # Local .env
 !.env.example
 .env.*
+
+# Test servers PIDs
+*.pid
diff --git a/Makefile b/Makefile
@@ -53,7 +53,9 @@ upgrade:
 
 test:
 	$(MAKE) test-static
-	$(MAKE) test-unit
+	$(MAKE) run-test-servers
+	$(MAKE) test-unit-simple
+	$(MAKE) kill-test-servers
 
 test-static:
 	@echo "➡️ Test dependencies issues (deptry)..."
@@ -65,22 +67,34 @@ test-static:
 	@echo "➡️ Test types (Pyright)..."
 	uv run pyright
 
-test-unit:
-	bash cicd/test-unit-ci.sh
-
-test-static-server:
+run-test-servers:
 	@echo "➡️ Starting local static server..."
-	python3 -m http.server -d ./tests/websites 8000
+	uv run -m http.server -d ./tests/websites 8000 & echo "$$!" > .static_server.pid
+
+kill-test-servers:
+	@echo "➡️ Killing local static server..."
+	kill -s SIGKILL $(shell cat .static_server.pid)
 
-test-unit-run:
-	@echo "➡️ Unit tests (Pytest)..."
+test-unit-simple:
+	@echo "➡️ Unit tests with no extra (Pytest)..."
 	uv run pytest \
-		--junit-xml=test-reports/$(version_full).xml \
-		--log-file=test-reports/$(version_full).log \
+		--junit-xml=test-reports/$(version_full)-simple.xml \
+		--log-file=test-reports/$(version_full)-simple.log \
 		--maxprocesses=4 \
 		-n=logical \
 		tests/*.py
 
+test-unit-profiling:
+	@echo "➡️ Unit tests with profiling (Pytest)..."
+	uv run scalene \
+		--json \
+		--outfile test-reports/$(version_full)-profiling.json \
+		--- -m pytest \
+			--junit-xml=test-reports/$(version_full)-profiling.xml \
+			--log-file=test-reports/$(version_full)-profiling.log \
+			-k "_profiling" \
+			tests/*.py
+
 dev:
 	uv pip install --editable .
 	@echo "Now you can run 'scrape-it-now' CLI!"

diff --git a/cicd/test-unit-ci.sh b/cicd/test-unit-ci.sh
diff --git a/pyproject.toml b/pyproject.toml
@@ -76,6 +76,7 @@ dev = [
   "pytest-xdist[psutil]~=3.6", # Pytest plugin for parallel testing
   "pytest~=8.3",               # Testing framework
   "ruff~=0.6",                 # Linter
+  "scalene~=1.5",              # Profiler
 ]
 
 [project.urls]

diff --git a/src/scrape_it_now/helpers/identity.py b/src/scrape_it_now/helpers/identity.py
@@ -2,13 +2,15 @@
 
 from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
 
+from scrape_it_now.helpers import IS_CI
 from scrape_it_now.helpers.cache import lru_acache
 from scrape_it_now.helpers.http import azure_transport
 
 
 @lru_acache()
 async def credential() -> DefaultAzureCredential:
     return DefaultAzureCredential(
+        process_timeout=30 if IS_CI else 10,  # 30 sec in CI, 10 secs in production
         # Performance
         transport=await azure_transport(),
     )

diff --git a/src/scrape_it_now/scrape.py b/src/scrape_it_now/scrape.py
@@ -74,9 +74,7 @@
 _ads_pattern_cache: re.Pattern | None = None
 
 # Bowser
-BROWSER_TIMEOUT_MS = (
-    5 * 60 * 1000 if IS_CI else 3 * 60 * 1000
-)  # 5 mins in CI, 3 secs in production
+BROWSER_TIMEOUT_MS = 3 * 60 * 1000  # 3 mins
 
 
 async def _queue(  # noqa: PLR0913

diff --git a/tests/blob.py b/tests/blob.py
@@ -27,7 +27,7 @@
     ids=lambda x: x.value,
 )
 @pytest.mark.repeat(10)  # Catch multi-threading and concurrency issues
-async def test_acid(provider: BlobProvider) -> None:
+async def test_acid_profiling(provider: BlobProvider) -> None:
     # Init values
     blob_content = _random_content()
     blob_name = _random_name()

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -12,9 +12,8 @@ async def browser() -> AsyncGenerator[Browser, None]:
     Fixture to provide a Playwright browser for each test.
     """
     # Make sure the browser and pandoc are installed
-    async with async_playwright() as p:
-        # Note: This won't install required system packages, make sure to install them manually
-        await install(False)
+    # Note: This won't install required system packages, make sure to install them manually
+    await install(False)
 
     # Restart context to reload PATH to the newly installed binaries
     async with async_playwright() as p:

diff --git a/tests/queue.py b/tests/queue.py
@@ -22,7 +22,7 @@
     ids=lambda x: x.value,
 )
 @pytest.mark.repeat(10)  # Catch multi-threading and concurrency issues
-async def test_acid(provider: QueueProvider) -> None:
+async def test_acid_profiling(provider: QueueProvider) -> None:
     # Init values
     queue_name = _random_name()
     contents = [

diff --git a/tests/scrape.py b/tests/scrape.py
@@ -41,7 +41,7 @@
     ],
     ids=lambda x: x,
 )
-async def test_scrape_page_website(
+async def test_scrape_page_website_profiling(
     website: str,
     browser: Browser,
 ) -> None:
@@ -293,9 +293,9 @@ async def test_scrape_page_timeout(browser: Browser) -> None:
     # Check timeout duration
     assert took_time > timedelta(
         seconds=(BROWSER_TIMEOUT_MS / 1000) - 1
-    ) and took_time < timedelta(seconds=(BROWSER_TIMEOUT_MS / 1000) + 5), (
-        f"Timeout should be around {BROWSER_TIMEOUT_MS / 1000} secs"
-    )
+    ) and took_time < timedelta(
+        seconds=(BROWSER_TIMEOUT_MS / 1000) + 5
+    ), f"Timeout should be around {BROWSER_TIMEOUT_MS / 1000} secs"
 
     # Check page is not None
     assert page is not None, "Page should not be None"