Skip to content

Commit

Permalink
[DISCO-3228] Add metrics and logging for curated recommendations favi…
Browse files Browse the repository at this point in the history
…cons (#804)

* [DISCO-3228] Add metrics and logging for curated recommendations favicons

* fix integration tests

* minor tweak

* clean up

* fix lint

* update test with hit metric assert

* update test with comment
  • Loading branch information
Herraj authored Feb 26, 2025
1 parent be31d52 commit 6c1afb6
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from urllib.parse import urlparse, urlencode, parse_qsl
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from dataclasses import dataclass
from pydantic import HttpUrl

import aiodogstatsd
from httpx import AsyncClient, HTTPError
Expand Down Expand Up @@ -318,6 +319,9 @@ async def _fetch_from_backend(
# get the utm_source based on scheduled surface id
utm_source = self.get_utm_source(surface_id)

# list that holds urls of corpus items that don't have an icon url in the favicon manifest file
corpus_items_no_icons: list[HttpUrl] = []

for item in data["data"]["scheduledSurface"]["items"]:
# Map Corpus topic to SERP topic
item["corpusItem"]["topic"] = self.map_corpus_topic_to_serp_topic(
Expand All @@ -330,6 +334,14 @@ async def _fetch_from_backend(
# Add icon URL if available
if icon_url := self.manifest_provider.get_icon_url(item["corpusItem"]["url"]):
item["corpusItem"]["iconUrl"] = icon_url
self.metrics_client.increment("corpus_item.manifest_provider.icon_url.hit")
else:
corpus_items_no_icons.append(item["corpusItem"]["url"])
self.metrics_client.increment("corpus_item.manifest_provider.icon_url.miss")

logger.info(
f"Curated Recommendations with missing icon urls: {len(corpus_items_no_icons)}, {corpus_items_no_icons}"
)

curated_recommendations = [
CorpusItem(
Expand Down
3 changes: 2 additions & 1 deletion tests/data/scheduled_surface.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"topic": "FOOD",
"publisher": "Epicurious",
"isTimeSensitive": false,
"imageUrl": "https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg"
"imageUrl": "https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg",
"iconUrl": "https://example.com/icon.png"
}
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,29 @@ async def test_fetch(corpus_backend: CorpusApiBackend, fixture_response_data):
results = await corpus_backend.fetch(surface_id)

assert len(results) == 80
assert results[0] == CorpusItem(
url=HttpUrl(
"https://getpocket.com/explore/item/milk-powder-is-the-key-to-better-cookies-"
"brownies-and-cakes?utm_source=firefox-newtab-en-us"
),
title="Milk Powder Is the Key to Better Cookies, Brownies, and Cakes",
excerpt="Consider this pantry staple your secret ingredient for making more flavorful "
"desserts.",
topic=Topic.FOOD,
publisher="Epicurious",
isTimeSensitive=False,
imageUrl=HttpUrl(
"https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/"
"40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg"
),
scheduledCorpusItemId="de614b6b-6df6-470a-97f2-30344c56c1b3",
corpusItemId="4095b364-02ff-402c-b58a-792a067fccf2",
iconUrl=None,
assert (
results[0]
== CorpusItem(
url=HttpUrl(
"https://getpocket.com/explore/item/milk-powder-is-the-key-to-better-cookies-"
"brownies-and-cakes?utm_source=firefox-newtab-en-us"
),
title="Milk Powder Is the Key to Better Cookies, Brownies, and Cakes",
excerpt="Consider this pantry staple your secret ingredient for making more flavorful "
"desserts.",
topic=Topic.FOOD,
publisher="Epicurious",
isTimeSensitive=False,
imageUrl=HttpUrl(
"https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/"
"40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg"
),
scheduledCorpusItemId="de614b6b-6df6-470a-97f2-30344c56c1b3",
corpusItemId="4095b364-02ff-402c-b58a-792a067fccf2",
iconUrl=HttpUrl(
"https://example.com/icon.png"
), # in the test data file, schedule_surface.json, only the item with this corpusItemId has the iconUrl property
)
)


Expand Down Expand Up @@ -90,6 +95,7 @@ def mock_post_by_date(*args, **kwargs):
"publisher": "Mozilla",
"isTimeSensitive": True,
"imageUrl": "https://example.com/image.jpg",
"iconUrl": None,
},
},
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,12 @@ async def test_curated_recommendations(repeat):
topic=Topic.FOOD,
publisher="Epicurious",
isTimeSensitive=False,
imageUrl="https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg",
imageUrl=HttpUrl(
"https://s3.us-east-1.amazonaws.com/pocket-curatedcorpusapi-prod-images/40e30ce2-a298-4b34-ab58-8f0f3910ee39.jpeg"
),
receivedRank=0,
tileId=301455520317019,
iconUrl=HttpUrl("https://example.com/icon.png"),
)
# Mock the endpoint
response = await fetch_en_us(ac)
Expand Down Expand Up @@ -1080,13 +1083,18 @@ async def test_metrics_cache_miss(self, mocker: MockerFixture) -> None:

# TODO: Remove reliance on internal details of aiodogstatsd
metric_keys: list[str] = [call.args[0] for call in report.call_args_list]
assert metric_keys == [
"corpus_api.request.timing",
"corpus_api.request.status_codes.200",
"post.api.v1.curated-recommendations.timing",
"post.api.v1.curated-recommendations.status_codes.200",
"response.status_codes.200",
]
# fetch_en_us loads scheduled_surface.json from a fixture, generating this metric 80 times for its 80 items.
icon_url_miss_metric_keys = ["corpus_item.manifest_provider.icon_url.miss"] * 80

assert metric_keys == (
["corpus_api.request.timing", "corpus_api.request.status_codes.200"]
+ icon_url_miss_metric_keys
+ [
"post.api.v1.curated-recommendations.timing",
"post.api.v1.curated-recommendations.status_codes.200",
"response.status_codes.200",
]
)

@pytest.mark.asyncio
async def test_metrics_cache_hit(self, mocker: MockerFixture) -> None:
Expand Down Expand Up @@ -1139,17 +1147,25 @@ def first_request_returns_error(*args, **kwargs):

# TODO: Remove reliance on internal details of aiodogstatsd
metric_keys: list[str] = [call.args[0] for call in report.call_args_list]
# fetch_en_us loads scheduled_surface.json from a fixture, generating this metric 80 times for its 80 items.
icon_url_miss_metric_keys = ["corpus_item.manifest_provider.icon_url.miss"] * 80

assert (
metric_keys
== [
"corpus_api.request.timing",
"corpus_api.request.status_codes.500",
"corpus_api.request.timing",
"corpus_api.request.status_codes.200",
"post.api.v1.curated-recommendations.timing",
"post.api.v1.curated-recommendations.status_codes.200", # final call should return 200
"response.status_codes.200",
]
== (
[
"corpus_api.request.timing",
"corpus_api.request.status_codes.500",
"corpus_api.request.timing",
"corpus_api.request.status_codes.200",
]
+ icon_url_miss_metric_keys
+ [
"post.api.v1.curated-recommendations.timing",
"post.api.v1.curated-recommendations.status_codes.200", # final call should return 200
"response.status_codes.200",
]
)
)


Expand Down Expand Up @@ -1601,14 +1617,18 @@ async def test_curated_recommendations_enriched_with_icons(
manifest_provider,
corpus_http_client,
fixture_request_data,
mocker: MockerFixture,
):
"""Test the enrichment of a curated recommendation with an added icon-url."""
# mock the statsd client so that we can assert on the increment metric below
report = mocker.patch.object(aiodogstatsd.Client, "_report")

# Set up the manifest data first
manifest_provider.manifest_data.domains = [
Domain(
rank=2,
title="Microsoft – AI, Cloud, Productivity, Computing, Gaming & Apps",
url="https://www.microsoft.com",
url=HttpUrl("https://www.microsoft.com"),
domain="microsoft",
icon="https://merino-images.services.mozilla.com/favicons/microsoft-icon.png",
categories=["Business", "Information Technology"],
Expand Down Expand Up @@ -1663,6 +1683,10 @@ async def test_curated_recommendations_enriched_with_icons(
item["iconUrl"] == "https://merino-images.services.mozilla.com/favicons/microsoft-icon.png"
)

# pull out all the metric keys
metric_keys: list[str] = [call.args[0] for call in report.call_args_list]
assert "corpus_item.manifest_provider.icon_url.hit" in metric_keys

# Clean up
if get_provider in app.dependency_overrides:
del app.dependency_overrides[get_provider]

0 comments on commit 6c1afb6

Please sign in to comment.