Skip to content

Commit

Permalink
Fixes AnthonyBloomer#12. Replace user agent with a known-working user…
Browse files Browse the repository at this point in the history
… agent. Replace urllib3 with requests, to automatically maintain the cookies necessary to avoid 403.
  • Loading branch information
qthequartermasterman committed Jun 23, 2022
1 parent 1cb0c6e commit 240322d
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 20 deletions.
31 changes: 14 additions & 17 deletions rcp/rcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,24 @@
import csv
from typing import List, Dict, Union, Any

import urllib3
import requests
from bs4 import BeautifulSoup

from fake_useragent import UserAgent

base = "https://www.realclearpolitics.com"

ua = UserAgent()


def _html(url: str) -> BeautifulSoup:
"""
Get the poll HTML.
:param url: The url of the poll.
:return: BeautifulSoup
"""
with urllib3.PoolManager() as manager:
res = manager.request("GET", url, headers={"User-Agent": ua.chrome})
if res.status != 200:
raise Exception(res.status)
soup = BeautifulSoup(res.data, "html.parser")
with requests.Session() as manager:
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
res = manager.request("GET", url, headers={"User-Agent": user_agent})
if res.status_code != 200:
raise Exception(res.status_code)
soup = BeautifulSoup(res.text, "html.parser")
return soup


Expand All @@ -45,10 +42,10 @@ def create_table(p: list, html_format: bool = False) -> str:


def get_polls(
url: str = "%s/epolls/latest_polls/" % base,
candidate: str = None,
pollster: str = None,
state: str = None,
url: str = "%s/epolls/latest_polls/" % base,
candidate: str = None,
pollster: str = None,
state: str = None,
) -> List[Dict[str, Union[str, Any]]]:
"""
:param state: The state to get polling data for.
Expand Down Expand Up @@ -77,9 +74,9 @@ def get_polls(
n = col.find("td", {"class": "lp-poll"}).find("a").text

if (
(candidate and candidate.lower() not in t.lower())
or (pollster and pollster.lower() not in n.lower())
or (state and state.lower() not in t.lower())
(candidate and candidate.lower() not in t.lower())
or (pollster and pollster.lower() not in n.lower())
or (state and state.lower() not in t.lower())
):
continue

Expand Down
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
beautifulsoup4==4.9.1
bs4==0.0.1
fake-useragent==0.1.11
PTable==0.9.2
soupsieve==2.0.1
urllib3==1.25.9
requests==2.28.0
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def run(self):
url="https://github.com/AnthonyBloomer/rcp",
install_requires=[
'beautifulsoup4',
'urllib3',
'requests',
'PTable'
],
classifiers=[
Expand Down

0 comments on commit 240322d

Please sign in to comment.