From 404b15ff944d621eefe585207f96204e0d8b0fac Mon Sep 17 00:00:00 2001 From: Eiko Wagenknecht Date: Sun, 3 Dec 2023 13:36:21 +0100 Subject: [PATCH] fix: recognize changed ubisoft offer format (#293) --- src/lootscraper/scraper/ubisoft_games.py | 36 ++++++++++++++++++------ 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/src/lootscraper/scraper/ubisoft_games.py b/src/lootscraper/scraper/ubisoft_games.py index 36327786..7f5f1ec1 100644 --- a/src/lootscraper/scraper/ubisoft_games.py +++ b/src/lootscraper/scraper/ubisoft_games.py @@ -1,5 +1,6 @@ from __future__ import annotations +import contextlib import logging from dataclasses import dataclass from datetime import datetime, timezone @@ -67,20 +68,24 @@ async def read_raw_offer( # Scroll element into view to load img url await element.scroll_into_view_if_needed() + # Format (values in <> are an example): + # Get for free! title = await element.locator(".c-focus-banner__title").text_content() if title is None: raise ValueError("Couldn't find title.") - if "free" not in title.lower(): + # Filter out various other promotions + if not title.startswith("Get ") or not title.endswith(" for FREE!"): return None - # Format: January 23, 2023 at 3PM UTC + # Format (values in <> are an example): + # Offer ends . valid_to = await element.locator(".c-focus-banner__legal-line").inner_text() if valid_to is None: raise ValueError(f"Couldn't find valid to for {title}.") - url = await element.locator("a").get_attribute("href") + url = await element.locator("a.button").get_attribute("href") if url is None: raise ValueError(f"Couldn't find url for {title}.") if not url.startswith("http"): @@ -112,15 +117,30 @@ def normalize_offer( } title = raw_offer.title.removeprefix("Get ").removesuffix(" for FREE!") - valid_to = raw_offer.valid_to.removeprefix("Offer valid until ").removesuffix( - " UTC.", + valid_to = ( + raw_offer.valid_to.removeprefix("Offer valid until ") + .removeprefix("Offer ends ") + .removesuffix( + " UTC.", + ) ) utc_valid_to = None if valid_to: - utc_valid_to = datetime.strptime(valid_to, "%B %d, %Y at %I%p").replace( - tzinfo=timezone.utc, - ) + with contextlib.suppress(ValueError): + utc_valid_to = datetime.strptime(valid_to, "%B %d, %Y at %I%p").replace( + tzinfo=timezone.utc, + ) + + # Fallback for weird date format "January 23 at 2023 at 3PM UTC", seen 2023-12 + if valid_to and not utc_valid_to: + with contextlib.suppress(ValueError): + utc_valid_to = datetime.strptime( + valid_to, + "%B %d at %Y at %I%p", + ).replace( + tzinfo=timezone.utc, + ) return Offer( source=UbisoftGamesScraper.get_source(),