Skip to content

Commit

Permalink
fix malformed urls
Browse files Browse the repository at this point in the history
  • Loading branch information
honzajavorek committed Jan 9, 2025
1 parent 2604078 commit 82a2699
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
10 changes: 9 additions & 1 deletion jg/plucker/meetups_pehapkari/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def parse_event(

self.logger.info(f"Event: {event.summary} {event.begin}")
try:
url = html.fromstring(event.description).xpath("//a/@href")[-1]
url = fix_url(html.fromstring(event.description).xpath("//a/@href")[-1])
except IndexError:
url = self.default_event_url
return Meetup(
Expand All @@ -58,3 +58,11 @@ def parse_event(
series_org="komunita kolem PHP",
series_url="https://www.pehapkari.cz/",
)


def fix_url(url: str) -> str:
if url.startswith("http"):
return url
if url.startswith("www."):
return f"https://{url}"
raise ValueError(f"Invalid URL: {url}")
20 changes: 20 additions & 0 deletions tests/test_meetups_pehapkari_spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest

from jg.plucker.meetups_pehapkari.spider import fix_url


@pytest.mark.parametrize(
"url, expected",
[
(
"www.facebook.com/events/1118251933124168",
"https://www.facebook.com/events/1118251933124168",
),
(
"https://www.meetup.com/pra%C5%BEske-srazy-p%C5%99atel-php-pehapkari-cz/events/305454246/",
"https://www.meetup.com/pra%C5%BEske-srazy-p%C5%99atel-php-pehapkari-cz/events/305454246/",
),
],
)
def test_fix_url(url: str, expected: str):
assert fix_url(url) == expected

0 comments on commit 82a2699

Please sign in to comment.