Skip to content

Commit

Permalink
fix: update amazon page layout (#297)
Browse files Browse the repository at this point in the history
  • Loading branch information
eikowagenknecht authored Dec 7, 2023
1 parent 2fac066 commit a884a0e
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 64 deletions.
40 changes: 26 additions & 14 deletions src/lootscraper/scraper/amazon_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from typing import TYPE_CHECKING

import schedule
from playwright.async_api import Error, Locator
from playwright.async_api import Error, Locator, TimeoutError

from lootscraper.browser import get_new_page
from lootscraper.common import OfferDuration, Source
from lootscraper.scraper.scraper_base import RawOffer, Scraper

Expand All @@ -32,7 +33,7 @@ def get_duration() -> OfferDuration:

@staticmethod
def get_schedule() -> list[schedule.Job]:
return [schedule.every(30).minutes]
return [schedule.every(60).minutes]

def offers_expected(self) -> bool:
return True
Expand All @@ -58,12 +59,6 @@ async def read_base_raw_offer(
if title is None:
raise ValueError("Couldn't find title.")

valid_to = await element.locator(
".item-card__availability-date p",
).text_content()
if valid_to is None:
raise ValueError(f"Couldn't find valid to for {title}.")

img_url = await element.locator(
'[data-a-target="card-image"] img',
).get_attribute("src")
Expand All @@ -73,19 +68,36 @@ async def read_base_raw_offer(
url = BASE_URL

try:
path = await element.locator(
'[data-a-target="learn-more-card"]',
).get_attribute("href", timeout=500)
path = await element.get_attribute("href", timeout=500)
if path is not None and not path.startswith("http"):
url += path
except Error:
# Some offers are claimed on site and don't have a specific path.
# That's fine.
pass
raise ValueError(f"Couldn't find detail page for {title}.") from None

try:
valid_to = await self.read_date_from_details_page(url)
except TimeoutError:
# Some offers just have no date. That's fine.
valid_to = None

return AmazonRawOffer(
title=title,
valid_to=valid_to,
url=url,
img_url=img_url,
)

async def read_date_from_details_page(
self,
url: str,
) -> str:
async with get_new_page(self.context) as page:
await page.goto(url, timeout=30000)

date = await page.locator(
".availability-date span:nth-child(2)",
).text_content()
if date is None:
raise ValueError("Couldn't find date.")

return date
55 changes: 30 additions & 25 deletions src/lootscraper/scraper/amazon_games.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

import logging
from datetime import date, datetime, time, timedelta, timezone
from datetime import datetime, timedelta, timezone
from typing import TYPE_CHECKING

from lootscraper.common import OfferType
Expand All @@ -25,7 +25,7 @@ def get_offer_handlers(self, page: Page) -> list[OfferHandler]:
OfferHandler(
page.locator(
'[data-a-target="offer-list-FGWP_FULL"] '
'[data-a-target="item-card"]',
" .item-card__action > a:first-child",
),
self.read_raw_offer,
self.normalize_offer,
Expand All @@ -35,10 +35,32 @@ def get_offer_handlers(self, page: Page) -> list[OfferHandler]:
async def page_loaded_hook(self, page: Page) -> None:
await Scraper.scroll_element_to_bottom(page, "root")

# Scroll through the carousel to load all offers
for _ in range(10):
next_button = page.locator(
'[data-a-target="grid-carousel-next-arrow-container"]',
)

if await next_button.is_disabled():
break

await next_button.click()

async def read_raw_offer(
self,
element: Locator,
) -> AmazonRawOffer:
# Rescroll to the right again (if it got lost)
for _ in range(10):
next_button = element.page.locator(
'[data-a-target="grid-carousel-next-arrow-container"]',
)

if await next_button.is_disabled():
break

await next_button.click()

return await self.read_base_raw_offer(element)

def normalize_offer(self, raw_offer: RawOffer) -> Offer:
Expand Down Expand Up @@ -80,45 +102,28 @@ def normalize_offer(self, raw_offer: RawOffer) -> Offer:
if raw_offer.valid_to:
logger.debug(f"Found date: {raw_offer.valid_to} for {raw_offer.title}")
try:
raw_date = raw_offer.valid_to.removeprefix("Ends ").lower()
if raw_date == "today":
raw_date = raw_offer.valid_to.removeprefix("Ends ")
if raw_date.lower() == "today":
parsed_date = datetime.now(tz=timezone.utc).replace(
hour=0,
minute=0,
second=0,
)
elif raw_date == "tomorrow":
elif raw_date.lower() == "tomorrow":
parsed_date = datetime.now(tz=timezone.utc).replace(
hour=0,
minute=0,
second=0,
) + timedelta(days=1)
else:
parsed_date = datetime.now(tz=timezone.utc).replace(
parsed_date = datetime.strptime(raw_date, "%b %d, %Y").replace(
tzinfo=timezone.utc,
hour=0,
minute=0,
second=0,
) + timedelta(days=int(raw_date.split(" ")[1]))

# Correct the year
guessed_end_date = date(
datetime.now(tz=timezone.utc).date().year,
parsed_date.month,
parsed_date.day,
)
yesterday = datetime.now(tz=timezone.utc).date() - timedelta(days=1)
if guessed_end_date < yesterday:
guessed_end_date = guessed_end_date.replace(
year=guessed_end_date.year + 1,
)

# Add 1 day because of the notation
# ("Ends today" means "Ends at 00:00:00 the next day")
end_date = datetime.combine(
guessed_end_date + timedelta(days=1),
time.min,
tzinfo=timezone.utc,
)
end_date = parsed_date
except (ValueError, IndexError):
logger.warning(f"Date parsing failed for {raw_offer.title}")

Expand Down
33 changes: 8 additions & 25 deletions src/lootscraper/scraper/amazon_loot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
from dataclasses import dataclass
from datetime import date, datetime, time, timedelta, timezone
from datetime import datetime, timedelta, timezone
from typing import TYPE_CHECKING

from lootscraper.common import OfferType
Expand Down Expand Up @@ -31,7 +31,7 @@ def get_offer_handlers(self, page: Page) -> list[OfferHandler]:
OfferHandler(
page.locator(
'[data-a-target="offer-list-IN_GAME_LOOT"] '
'[data-a-target="item-card"]',
" .item-card__action > a:first-child",
),
self.read_raw_offer,
self.normalize_offer,
Expand Down Expand Up @@ -104,45 +104,28 @@ def normalize_offer(self, raw_offer: RawOffer) -> Offer:
if raw_offer.valid_to:
logger.debug(f"Found date: {raw_offer.valid_to} for {raw_offer.title}")
try:
raw_date = raw_offer.valid_to.removeprefix("Ends ").lower()
if raw_date == "today":
raw_date = raw_offer.valid_to.removeprefix("Ends ")
if raw_date.lower() == "today":
parsed_date = datetime.now(tz=timezone.utc).replace(
hour=0,
minute=0,
second=0,
)
elif raw_date == "tomorrow":
elif raw_date.lower() == "tomorrow":
parsed_date = datetime.now(tz=timezone.utc).replace(
hour=0,
minute=0,
second=0,
) + timedelta(days=1)
else:
parsed_date = datetime.now(tz=timezone.utc).replace(
parsed_date = datetime.strptime(raw_date, "%b %d, %Y").replace(
tzinfo=timezone.utc,
hour=0,
minute=0,
second=0,
) + timedelta(days=int(raw_date.split(" ")[1]))

# Correct the year
guessed_end_date = date(
datetime.now(tz=timezone.utc).date().year,
parsed_date.month,
parsed_date.day,
)
yesterday = datetime.now(tz=timezone.utc).date() - timedelta(days=1)
if guessed_end_date < yesterday:
guessed_end_date = guessed_end_date.replace(
year=guessed_end_date.year + 1,
)

# Add 1 day because of the notation
# ("Ends today" means "Ends at 00:00:00 the next day")
end_date = datetime.combine(
guessed_end_date + timedelta(days=1),
time.min,
tzinfo=timezone.utc,
)
end_date = parsed_date
except (ValueError, IndexError):
logger.warning(f"Date parsing failed for {raw_offer.title}")

Expand Down

0 comments on commit a884a0e

Please sign in to comment.