From 3746826de3b57dd7c14235bb6b8709623a603a3a Mon Sep 17 00:00:00 2001 From: kart2004 <“karthikprakash999@gmail.com”> Date: Sat, 18 May 2024 21:34:28 +0530 Subject: [PATCH 1/3] Codeforces_Contests --- dev-documentation.md | 6 +-- src/scrape_up/codeforces/__init__.py | 4 ++ src/scrape_up/codeforces/contests.py | 81 ++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 src/scrape_up/codeforces/__init__.py create mode 100644 src/scrape_up/codeforces/contests.py diff --git a/dev-documentation.md b/dev-documentation.md index c4300c7f..cca33728 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1635,13 +1635,12 @@ olympics = Olympics() ## Codeforces -Create an instance of `Users` class +Create an instance of `Codeforces` class ```python from scrape_up import codeforces -codeforces_user = codeforces.Users(username="tourist") -codeforces_user.get_user_data() +codeforces = Codeforces() ``` Methods @@ -1650,6 +1649,7 @@ Methods | Methods | Details | | -------------------------- | ---------------------------------- | | `.get_user_data(username)` | Fetches user data from CodeForces. | +| `get_contests()` | Returns information on contests. | ``` --- diff --git a/src/scrape_up/codeforces/__init__.py b/src/scrape_up/codeforces/__init__.py new file mode 100644 index 00000000..8655dc03 --- /dev/null +++ b/src/scrape_up/codeforces/__init__.py @@ -0,0 +1,4 @@ +from .user import Users +from .contests import Contest + +__all__ = ["Users", "Contest"] \ No newline at end of file diff --git a/src/scrape_up/codeforces/contests.py b/src/scrape_up/codeforces/contests.py new file mode 100644 index 00000000..22f54a0b --- /dev/null +++ b/src/scrape_up/codeforces/contests.py @@ -0,0 +1,81 @@ +from bs4 import BeautifulSoup + +from scrape_up.config.request_config import RequestConfig, get + +class Contest: + """ + First, create an object of class `Contest` + + ```python + codeforces = Contest() + ``` + + | Methods | Details | + | ---------------------------- | ----------------------------------------------------------------------------------------- | + | `get_contests()` | Returns information on active contests like title, start, and duration | + """ + def __init__(self, *, config: RequestConfig = RequestConfig()): + headers = {"User-Agent": "scrapeup"} + self.config = config + if self.config.headers == {}: + self.config.set_headers(headers) + + def get_contests(self): + """ + Method to fetch the list of active contests on Codeforces using web scraping. + + Example + ------- + ```python + codeforces = Contest() + codeforces.get_contests() + ``` + + Returns + ------- + { + "data": [ + { + "name": "Codeforces Round #731 (Div. 3)", + "start": "Aug/08/2021 17:35", + "length": "2 hrs" + }, + { + "name": "Codeforces Round 946 (Div. 3)", + "start": "05/20/2024 17:35", + "length": "02:15", + "status": "upcoming" + } + ], + "message": "Found contest list" + } + """ + codeforces_url = "https://codeforces.com/contests" + response = get(codeforces_url, self.config) + + if response.status_code != 200: + return {"data": None, "message": "Can not load Contest"} + + soup = BeautifulSoup(response.text, "html.parser") + contest_list = [] + + upcoming_contests = soup.find("div", {"class": "datatable"}).find_all("tr") + for contest in upcoming_contests: + columns = contest.find_all("td") + if len(columns) == 6: # The number of columns in the table row for contests + name = columns[0].text.strip() + start_time_str = columns[2].text.strip() + duration_str = columns[3].text.strip() + + name = ' '.join(line.strip() for line in name.splitlines() if line.strip()) + name = name.replace('Enter »', '').strip() + + contest_list.append({ + "name": name, + "start": start_time_str, + "length": duration_str, + }) + + return {"data": contest_list, "message": "Found contest list"} + + From 460521a40ac37188b8aa64be78c959a960dfa4eb Mon Sep 17 00:00:00 2001 From: kart2004 <“karthikprakash999@gmail.com”> Date: Sat, 18 May 2024 22:18:05 +0530 Subject: [PATCH 2/3] Codeforces_schedule --- src/scrape_up/codeforces/contests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scrape_up/codeforces/contests.py b/src/scrape_up/codeforces/contests.py index 22f54a0b..367891e4 100644 --- a/src/scrape_up/codeforces/contests.py +++ b/src/scrape_up/codeforces/contests.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup +import json from scrape_up.config.request_config import RequestConfig, get @@ -54,7 +55,7 @@ def get_contests(self): response = get(codeforces_url, self.config) if response.status_code != 200: - return {"data": None, "message": "Can not load Contest"} + json.dumps({"data": None, "message": "Cannot load Contest"}) soup = BeautifulSoup(response.text, "html.parser") contest_list = [] @@ -76,6 +77,5 @@ def get_contests(self): "length": duration_str, }) - return {"data": contest_list, "message": "Found contest list"} - + return json.dumps({"data": contest_list, "message": "Found contest list"}) From c11f4cadd730716f50f46aa75ba650c25ed8439d Mon Sep 17 00:00:00 2001 From: Nikhil Raj Date: Sun, 19 May 2024 13:09:43 +0530 Subject: [PATCH 3/3] Check --- dev-documentation.md | 7 +--- src/scrape_up/ambitionBox/company.py | 3 +- src/scrape_up/codeforces/__init__.py | 2 +- src/scrape_up/codeforces/contests.py | 57 +++++++++++++++------------- src/scrape_up/indiantrekking/trek.py | 2 +- src/scrape_up/lichess/lichess.py | 5 +-- 6 files changed, 37 insertions(+), 39 deletions(-) diff --git a/dev-documentation.md b/dev-documentation.md index cca33728..2e4b84a9 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1649,7 +1649,7 @@ Methods | Methods | Details | | -------------------------- | ---------------------------------- | | `.get_user_data(username)` | Fetches user data from CodeForces. | -| `get_contests()` | Returns information on contests. | +| `get_contests()` | Returns information on contests. | ``` --- @@ -1745,9 +1745,7 @@ result = steam.ScrapeGames(n0Games=5, tags=["Discounts", "F2P"]) | ----------------------------- | ------------------------------------------- | | `.ScrapeGames(n0Games, tags)` | Scrapes game data for each specified filter | - -------- - +--- ## Lichess @@ -1853,4 +1851,3 @@ trek=Indiantrekking("hidden-lakes-of-kashmir") | `outline_day_to_day_itinerary` | returns the ouline of the day to day itinerary | --- - diff --git a/src/scrape_up/ambitionBox/company.py b/src/scrape_up/ambitionBox/company.py index f54358ec..30f649f9 100644 --- a/src/scrape_up/ambitionBox/company.py +++ b/src/scrape_up/ambitionBox/company.py @@ -1,4 +1,3 @@ - # import requests # from bs4 import BeautifulSoup @@ -78,4 +77,4 @@ # if __name__ == "__main__": # c = Comapiens(10) -# c.scrape_companies() \ No newline at end of file +# c.scrape_companies() diff --git a/src/scrape_up/codeforces/__init__.py b/src/scrape_up/codeforces/__init__.py index 8655dc03..e3489d44 100644 --- a/src/scrape_up/codeforces/__init__.py +++ b/src/scrape_up/codeforces/__init__.py @@ -1,4 +1,4 @@ from .user import Users from .contests import Contest -__all__ = ["Users", "Contest"] \ No newline at end of file +__all__ = ["Users", "Contest"] diff --git a/src/scrape_up/codeforces/contests.py b/src/scrape_up/codeforces/contests.py index 367891e4..ade341a4 100644 --- a/src/scrape_up/codeforces/contests.py +++ b/src/scrape_up/codeforces/contests.py @@ -1,8 +1,8 @@ from bs4 import BeautifulSoup import json - from scrape_up.config.request_config import RequestConfig, get + class Contest: """ First, create an object of class `Contest` @@ -15,6 +15,7 @@ class Contest: | ---------------------------- | ----------------------------------------------------------------------------------------- | | `get_contests()` | Returns information on active contests like title, start, and duration | """ + def __init__(self, *, config: RequestConfig = RequestConfig()): headers = {"User-Agent": "scrapeup"} self.config = config @@ -36,46 +37,48 @@ def get_contests(self): ------- { "data": [ - { - "name": "Codeforces Round #731 (Div. 3)", - "start": "Aug/08/2021 17:35", - "length": "2 hrs" - }, { - "name": "Codeforces Round 946 (Div. 3)", - "start": "05/20/2024 17:35", - "length": "02:15", - "status": "upcoming" - } + "name": "Codeforces Round #731 (Div. 3)", + "start": "Aug/08/2021 17:35", + "length": "2 hrs" + }, + ... ], "message": "Found contest list" } """ codeforces_url = "https://codeforces.com/contests" response = get(codeforces_url, self.config) - + if response.status_code != 200: json.dumps({"data": None, "message": "Cannot load Contest"}) soup = BeautifulSoup(response.text, "html.parser") contest_list = [] - upcoming_contests = soup.find("div", {"class": "datatable"}).find_all("tr") - for contest in upcoming_contests: - columns = contest.find_all("td") - if len(columns) == 6: # The number of columns in the table row for contests - name = columns[0].text.strip() - start_time_str = columns[2].text.strip() - duration_str = columns[3].text.strip() + try: - name = ' '.join(line.strip() for line in name.splitlines() if line.strip()) - name = name.replace('Enter »', '').strip() + upcoming_contests = soup.find("div", {"class": "datatable"}).find_all("tr") + for contest in upcoming_contests: + columns = contest.find_all("td") + if len(columns) == 6: + name = columns[0].text.strip() + start_time_str = columns[2].text.strip() + duration_str = columns[3].text.strip() - contest_list.append({ - "name": name, - "start": start_time_str, - "length": duration_str, - }) + name = " ".join( + line.strip() for line in name.splitlines() if line.strip() + ) + name = name.replace("Enter »", "").strip() - return json.dumps({"data": contest_list, "message": "Found contest list"}) + contest_list.append( + { + "name": name, + "start": start_time_str, + "length": duration_str, + } + ) + return contest_list + except Exception: + return None diff --git a/src/scrape_up/indiantrekking/trek.py b/src/scrape_up/indiantrekking/trek.py index 159009f6..58cb3c99 100644 --- a/src/scrape_up/indiantrekking/trek.py +++ b/src/scrape_up/indiantrekking/trek.py @@ -59,4 +59,4 @@ def outline_day_to_day_itinerary(self): outline = self.soup.find("div", class_="itinerary").text return outline except: - return None \ No newline at end of file + return None diff --git a/src/scrape_up/lichess/lichess.py b/src/scrape_up/lichess/lichess.py index 77710ada..cd95d1f0 100644 --- a/src/scrape_up/lichess/lichess.py +++ b/src/scrape_up/lichess/lichess.py @@ -41,7 +41,7 @@ def __fetch_page_games(self, page_num): game_list.append(game_info) return game_list - def fetch_games(self,start_page=1,end_page=4): + def fetch_games(self, start_page=1, end_page=4): """ Fetch all the games data for the specified username. @@ -53,7 +53,7 @@ def fetch_games(self,start_page=1,end_page=4): ```python # Default usage: games = scraper.fetch_games() - + # Custom usage: games = scraper.fetch_games(start_page=5, end_page=8) ``` @@ -130,4 +130,3 @@ def __get_pgn(self, game_data): pgn_request = requests.get(f"https://lichess.org{gameUrl}")._content parsed_pgn = BeautifulSoup(pgn_request, "lxml") return parsed_pgn.find("div", {"class": "pgn"}).text - \ No newline at end of file