Skip to content

Commit

Permalink
Fix category parsing due to artvee.com HTML changes
Browse files Browse the repository at this point in the history
  • Loading branch information
zduclos committed Jul 3, 2022
1 parent b07fc1e commit a4fb8ec
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 9 deletions.
2 changes: 1 addition & 1 deletion artvee_scraper/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "2.0.0"
__version__ = "2.0.1"
13 changes: 5 additions & 8 deletions artvee_scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ def start(self):
logger.info("Processing %s (%d/%d)",
category, page, page_count)
page_url = f"https://www.artvee.com/c/{category}/page/{page}/?per_page={ArtveeScraper._ITEMS_PER_PAGE}"
artwork_list = ArtveeScraper._scrape_artwork_data(page_url)
artwork_list = ArtveeScraper._scrape_artwork_data(
page_url, category.value.capitalize())

results = self.workers.map(self._worker_task, artwork_list)

Expand Down Expand Up @@ -179,7 +180,7 @@ def _num_pages_for_category(category: CategoryType) -> int:
return 0

@staticmethod
def _scrape_artwork_data(page_url: str) -> List[Artwork]:
def _scrape_artwork_data(page_url: str, category: str) -> List[Artwork]:
scraped_artwork = []

try:
Expand All @@ -195,7 +196,7 @@ def _scrape_artwork_data(page_url: str) -> List[Artwork]:
)

for meta in all_metadata_html:
if artwork := ArtveeScraper._parse_metadata_html(meta):
if artwork := ArtveeScraper._parse_metadata_html(meta, category):
scraped_artwork.append(artwork)

else:
Expand All @@ -214,15 +215,11 @@ def _scrape_artwork_data(page_url: str) -> List[Artwork]:
return scraped_artwork

@staticmethod
def _parse_metadata_html(metadata_html: Tag) -> Optional[Artwork]:
def _parse_metadata_html(metadata_html: Tag, category: str) -> Optional[Artwork]:
try:
img_details = metadata_html.find("h3", {"class": "product-title"})

url = img_details.a.get("href")
title = img_details.get_text(strip=True)
category = metadata_html.find(
"div", {"class": "woodmart-product-cats"}
).get_text(strip=True)

artwork = Artwork(url, title, category)

Expand Down

0 comments on commit a4fb8ec

Please sign in to comment.