diff --git a/merino/curated_recommendations/corpus_backends/corpus_api_backend.py b/merino/curated_recommendations/corpus_backends/corpus_api_backend.py index 6e36ada0..48ad74bb 100644 --- a/merino/curated_recommendations/corpus_backends/corpus_api_backend.py +++ b/merino/curated_recommendations/corpus_backends/corpus_api_backend.py @@ -7,6 +7,7 @@ from urllib.parse import urlparse, urlencode, parse_qsl from zoneinfo import ZoneInfo, ZoneInfoNotFoundError from dataclasses import dataclass +from pydantic import HttpUrl import aiodogstatsd from httpx import AsyncClient, HTTPError @@ -318,6 +319,9 @@ async def _fetch_from_backend( # get the utm_source based on scheduled surface id utm_source = self.get_utm_source(surface_id) + # list that holds urls of corpus items that don't have an icon url in the favicon manifest file + corpus_items_no_icons: list[HttpUrl] = [] + for item in data["data"]["scheduledSurface"]["items"]: # Map Corpus topic to SERP topic item["corpusItem"]["topic"] = self.map_corpus_topic_to_serp_topic( @@ -330,6 +334,14 @@ async def _fetch_from_backend( # Add icon URL if available if icon_url := self.manifest_provider.get_icon_url(item["corpusItem"]["url"]): item["corpusItem"]["iconUrl"] = icon_url + self.metrics_client.increment("corpus_item.manifest_provider.icon_url.hit") + else: + corpus_items_no_icons.append(item["corpusItem"]["url"]) + self.metrics_client.increment("corpus_item.manifest_provider.icon_url.miss") + + logger.info( + f"Curated Recommendations with missing icon urls: {len(corpus_items_no_icons)}, {corpus_items_no_icons}" + ) curated_recommendations = [ CorpusItem( diff --git a/tests/data/scheduled_surface.json b/tests/data/scheduled_surface.json index 6f8c4f03..a56b4b83 100644 --- a/tests/data/scheduled_surface.json +++ b/tests/data/scheduled_surface.json @@ -12,7 +12,8 @@ "topic": "FOOD", "publisher": "Epicurious", "isTimeSensitive": false, - "imageUrl": "https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg" + "imageUrl": "https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg", + "iconUrl": "https://example.com/icon.png" } }, { diff --git a/tests/integration/api/v1/curated_recommendations/corpus_backends/test_corpus_api_backend.py b/tests/integration/api/v1/curated_recommendations/corpus_backends/test_corpus_api_backend.py index fee818cd..78e89653 100644 --- a/tests/integration/api/v1/curated_recommendations/corpus_backends/test_corpus_api_backend.py +++ b/tests/integration/api/v1/curated_recommendations/corpus_backends/test_corpus_api_backend.py @@ -25,24 +25,29 @@ async def test_fetch(corpus_backend: CorpusApiBackend, fixture_response_data): results = await corpus_backend.fetch(surface_id) assert len(results) == 80 - assert results[0] == CorpusItem( - url=HttpUrl( - "https://getpocket.com/explore/item/milk-powder-is-the-key-to-better-cookies-" - "brownies-and-cakes?utm_source=firefox-newtab-en-us" - ), - title="Milk Powder Is the Key to Better Cookies, Brownies, and Cakes", - excerpt="Consider this pantry staple your secret ingredient for making more flavorful " - "desserts.", - topic=Topic.FOOD, - publisher="Epicurious", - isTimeSensitive=False, - imageUrl=HttpUrl( - "https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/" - "40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg" - ), - scheduledCorpusItemId="de614b6b-6df6-470a-97f2-30344c56c1b3", - corpusItemId="4095b364-02ff-402c-b58a-792a067fccf2", - iconUrl=None, + assert ( + results[0] + == CorpusItem( + url=HttpUrl( + "https://getpocket.com/explore/item/milk-powder-is-the-key-to-better-cookies-" + "brownies-and-cakes?utm_source=firefox-newtab-en-us" + ), + title="Milk Powder Is the Key to Better Cookies, Brownies, and Cakes", + excerpt="Consider this pantry staple your secret ingredient for making more flavorful " + "desserts.", + topic=Topic.FOOD, + publisher="Epicurious", + isTimeSensitive=False, + imageUrl=HttpUrl( + "https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/" + "40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg" + ), + scheduledCorpusItemId="de614b6b-6df6-470a-97f2-30344c56c1b3", + corpusItemId="4095b364-02ff-402c-b58a-792a067fccf2", + iconUrl=HttpUrl( + "https://example.com/icon.png" + ), # in the test data file, schedule_surface.json, only the item with this corpusItemId has the iconUrl property + ) ) @@ -90,6 +95,7 @@ def mock_post_by_date(*args, **kwargs): "publisher": "Mozilla", "isTimeSensitive": True, "imageUrl": "https://example.com/image.jpg", + "iconUrl": None, }, }, ] diff --git a/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py b/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py index dadbb91e..cd62a508 100644 --- a/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py +++ b/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py @@ -208,9 +208,12 @@ async def test_curated_recommendations(repeat): topic=Topic.FOOD, publisher="Epicurious", isTimeSensitive=False, - imageUrl="https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg", + imageUrl=HttpUrl( + "https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg" + ), receivedRank=0, tileId=301455520317019, + iconUrl=HttpUrl("https://example.com/icon.png"), ) # Mock the endpoint response = await fetch_en_us(ac) @@ -1080,13 +1083,18 @@ async def test_metrics_cache_miss(self, mocker: MockerFixture) -> None: # TODO: Remove reliance on internal details of aiodogstatsd metric_keys: list[str] = [call.args[0] for call in report.call_args_list] - assert metric_keys == [ - "corpus_api.request.timing", - "corpus_api.request.status_codes.200", - "post.api.v1.curated-recommendations.timing", - "post.api.v1.curated-recommendations.status_codes.200", - "response.status_codes.200", - ] + # fetch_en_us loads scheduled_surface.json from a fixture, generating this metric 80 times for its 80 items. + icon_url_miss_metric_keys = ["corpus_item.manifest_provider.icon_url.miss"] * 80 + + assert metric_keys == ( + ["corpus_api.request.timing", "corpus_api.request.status_codes.200"] + + icon_url_miss_metric_keys + + [ + "post.api.v1.curated-recommendations.timing", + "post.api.v1.curated-recommendations.status_codes.200", + "response.status_codes.200", + ] + ) @pytest.mark.asyncio async def test_metrics_cache_hit(self, mocker: MockerFixture) -> None: @@ -1139,17 +1147,25 @@ def first_request_returns_error(*args, **kwargs): # TODO: Remove reliance on internal details of aiodogstatsd metric_keys: list[str] = [call.args[0] for call in report.call_args_list] + # fetch_en_us loads scheduled_surface.json from a fixture, generating this metric 80 times for its 80 items. + icon_url_miss_metric_keys = ["corpus_item.manifest_provider.icon_url.miss"] * 80 + assert ( metric_keys - == [ - "corpus_api.request.timing", - "corpus_api.request.status_codes.500", - "corpus_api.request.timing", - "corpus_api.request.status_codes.200", - "post.api.v1.curated-recommendations.timing", - "post.api.v1.curated-recommendations.status_codes.200", # final call should return 200 - "response.status_codes.200", - ] + == ( + [ + "corpus_api.request.timing", + "corpus_api.request.status_codes.500", + "corpus_api.request.timing", + "corpus_api.request.status_codes.200", + ] + + icon_url_miss_metric_keys + + [ + "post.api.v1.curated-recommendations.timing", + "post.api.v1.curated-recommendations.status_codes.200", # final call should return 200 + "response.status_codes.200", + ] + ) ) @@ -1601,14 +1617,18 @@ async def test_curated_recommendations_enriched_with_icons( manifest_provider, corpus_http_client, fixture_request_data, + mocker: MockerFixture, ): """Test the enrichment of a curated recommendation with an added icon-url.""" + # mock the statsd client so that we can assert on the increment metric below + report = mocker.patch.object(aiodogstatsd.Client, "_report") + # Set up the manifest data first manifest_provider.manifest_data.domains = [ Domain( rank=2, title="Microsoft – AI, Cloud, Productivity, Computing, Gaming & Apps", - url="https://www.microsoft.com", + url=HttpUrl("https://www.microsoft.com"), domain="microsoft", icon="https://merino-images.services.mozilla.com/favicons/microsoft-icon.png", categories=["Business", "Information Technology"], @@ -1663,6 +1683,10 @@ async def test_curated_recommendations_enriched_with_icons( item["iconUrl"] == "https://merino-images.services.mozilla.com/favicons/microsoft-icon.png" ) + # pull out all the metric keys + metric_keys: list[str] = [call.args[0] for call in report.call_args_list] + assert "corpus_item.manifest_provider.icon_url.hit" in metric_keys + # Clean up if get_provider in app.dependency_overrides: del app.dependency_overrides[get_provider]