diff --git a/src/lootscraper/scraper/amazon_base.py b/src/lootscraper/scraper/amazon_base.py index 96a9d1ee..cef4e053 100644 --- a/src/lootscraper/scraper/amazon_base.py +++ b/src/lootscraper/scraper/amazon_base.py @@ -4,8 +4,9 @@ from typing import TYPE_CHECKING import schedule -from playwright.async_api import Error, Locator +from playwright.async_api import Error, Locator, TimeoutError +from lootscraper.browser import get_new_page from lootscraper.common import OfferDuration, Source from lootscraper.scraper.scraper_base import RawOffer, Scraper @@ -32,7 +33,7 @@ def get_duration() -> OfferDuration: @staticmethod def get_schedule() -> list[schedule.Job]: - return [schedule.every(30).minutes] + return [schedule.every(60).minutes] def offers_expected(self) -> bool: return True @@ -58,12 +59,6 @@ async def read_base_raw_offer( if title is None: raise ValueError("Couldn't find title.") - valid_to = await element.locator( - ".item-card__availability-date p", - ).text_content() - if valid_to is None: - raise ValueError(f"Couldn't find valid to for {title}.") - img_url = await element.locator( '[data-a-target="card-image"] img', ).get_attribute("src") @@ -73,15 +68,17 @@ async def read_base_raw_offer( url = BASE_URL try: - path = await element.locator( - '[data-a-target="learn-more-card"]', - ).get_attribute("href", timeout=500) + path = await element.get_attribute("href", timeout=500) if path is not None and not path.startswith("http"): url += path except Error: - # Some offers are claimed on site and don't have a specific path. - # That's fine. - pass + raise ValueError(f"Couldn't find detail page for {title}.") from None + + try: + valid_to = await self.read_date_from_details_page(url) + except TimeoutError: + # Some offers just have no date. That's fine. + valid_to = None return AmazonRawOffer( title=title, @@ -89,3 +86,18 @@ async def read_base_raw_offer( url=url, img_url=img_url, ) + + async def read_date_from_details_page( + self, + url: str, + ) -> str: + async with get_new_page(self.context) as page: + await page.goto(url, timeout=30000) + + date = await page.locator( + ".availability-date span:nth-child(2)", + ).text_content() + if date is None: + raise ValueError("Couldn't find date.") + + return date diff --git a/src/lootscraper/scraper/amazon_games.py b/src/lootscraper/scraper/amazon_games.py index 8c28f6bd..c9202bc7 100644 --- a/src/lootscraper/scraper/amazon_games.py +++ b/src/lootscraper/scraper/amazon_games.py @@ -1,7 +1,7 @@ from __future__ import annotations import logging -from datetime import date, datetime, time, timedelta, timezone +from datetime import datetime, timedelta, timezone from typing import TYPE_CHECKING from lootscraper.common import OfferType @@ -25,7 +25,7 @@ def get_offer_handlers(self, page: Page) -> list[OfferHandler]: OfferHandler( page.locator( '[data-a-target="offer-list-FGWP_FULL"] ' - '[data-a-target="item-card"]', + " .item-card__action > a:first-child", ), self.read_raw_offer, self.normalize_offer, @@ -35,10 +35,32 @@ def get_offer_handlers(self, page: Page) -> list[OfferHandler]: async def page_loaded_hook(self, page: Page) -> None: await Scraper.scroll_element_to_bottom(page, "root") + # Scroll through the carousel to load all offers + for _ in range(10): + next_button = page.locator( + '[data-a-target="grid-carousel-next-arrow-container"]', + ) + + if await next_button.is_disabled(): + break + + await next_button.click() + async def read_raw_offer( self, element: Locator, ) -> AmazonRawOffer: + # Rescroll to the right again (if it got lost) + for _ in range(10): + next_button = element.page.locator( + '[data-a-target="grid-carousel-next-arrow-container"]', + ) + + if await next_button.is_disabled(): + break + + await next_button.click() + return await self.read_base_raw_offer(element) def normalize_offer(self, raw_offer: RawOffer) -> Offer: @@ -80,45 +102,28 @@ def normalize_offer(self, raw_offer: RawOffer) -> Offer: if raw_offer.valid_to: logger.debug(f"Found date: {raw_offer.valid_to} for {raw_offer.title}") try: - raw_date = raw_offer.valid_to.removeprefix("Ends ").lower() - if raw_date == "today": + raw_date = raw_offer.valid_to.removeprefix("Ends ") + if raw_date.lower() == "today": parsed_date = datetime.now(tz=timezone.utc).replace( hour=0, minute=0, second=0, ) - elif raw_date == "tomorrow": + elif raw_date.lower() == "tomorrow": parsed_date = datetime.now(tz=timezone.utc).replace( hour=0, minute=0, second=0, ) + timedelta(days=1) else: - parsed_date = datetime.now(tz=timezone.utc).replace( + parsed_date = datetime.strptime(raw_date, "%b %d, %Y").replace( + tzinfo=timezone.utc, hour=0, minute=0, second=0, - ) + timedelta(days=int(raw_date.split(" ")[1])) - - # Correct the year - guessed_end_date = date( - datetime.now(tz=timezone.utc).date().year, - parsed_date.month, - parsed_date.day, - ) - yesterday = datetime.now(tz=timezone.utc).date() - timedelta(days=1) - if guessed_end_date < yesterday: - guessed_end_date = guessed_end_date.replace( - year=guessed_end_date.year + 1, ) - # Add 1 day because of the notation - # ("Ends today" means "Ends at 00:00:00 the next day") - end_date = datetime.combine( - guessed_end_date + timedelta(days=1), - time.min, - tzinfo=timezone.utc, - ) + end_date = parsed_date except (ValueError, IndexError): logger.warning(f"Date parsing failed for {raw_offer.title}") diff --git a/src/lootscraper/scraper/amazon_loot.py b/src/lootscraper/scraper/amazon_loot.py index a98579d8..01444709 100644 --- a/src/lootscraper/scraper/amazon_loot.py +++ b/src/lootscraper/scraper/amazon_loot.py @@ -2,7 +2,7 @@ import logging from dataclasses import dataclass -from datetime import date, datetime, time, timedelta, timezone +from datetime import datetime, timedelta, timezone from typing import TYPE_CHECKING from lootscraper.common import OfferType @@ -31,7 +31,7 @@ def get_offer_handlers(self, page: Page) -> list[OfferHandler]: OfferHandler( page.locator( '[data-a-target="offer-list-IN_GAME_LOOT"] ' - '[data-a-target="item-card"]', + " .item-card__action > a:first-child", ), self.read_raw_offer, self.normalize_offer, @@ -104,45 +104,28 @@ def normalize_offer(self, raw_offer: RawOffer) -> Offer: if raw_offer.valid_to: logger.debug(f"Found date: {raw_offer.valid_to} for {raw_offer.title}") try: - raw_date = raw_offer.valid_to.removeprefix("Ends ").lower() - if raw_date == "today": + raw_date = raw_offer.valid_to.removeprefix("Ends ") + if raw_date.lower() == "today": parsed_date = datetime.now(tz=timezone.utc).replace( hour=0, minute=0, second=0, ) - elif raw_date == "tomorrow": + elif raw_date.lower() == "tomorrow": parsed_date = datetime.now(tz=timezone.utc).replace( hour=0, minute=0, second=0, ) + timedelta(days=1) else: - parsed_date = datetime.now(tz=timezone.utc).replace( + parsed_date = datetime.strptime(raw_date, "%b %d, %Y").replace( + tzinfo=timezone.utc, hour=0, minute=0, second=0, - ) + timedelta(days=int(raw_date.split(" ")[1])) - - # Correct the year - guessed_end_date = date( - datetime.now(tz=timezone.utc).date().year, - parsed_date.month, - parsed_date.day, - ) - yesterday = datetime.now(tz=timezone.utc).date() - timedelta(days=1) - if guessed_end_date < yesterday: - guessed_end_date = guessed_end_date.replace( - year=guessed_end_date.year + 1, ) - # Add 1 day because of the notation - # ("Ends today" means "Ends at 00:00:00 the next day") - end_date = datetime.combine( - guessed_end_date + timedelta(days=1), - time.min, - tzinfo=timezone.utc, - ) + end_date = parsed_date except (ValueError, IndexError): logger.warning(f"Date parsing failed for {raw_offer.title}")