Skip to content

Commit

Permalink
Merge pull request #16 from datasets/update-script
Browse files Browse the repository at this point in the history
[fix][s] Changing fetching from urllib to requests
  • Loading branch information
Mikanebu authored Feb 3, 2025
2 parents 9e15c58 + c65a0ce commit c1e991a
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 9 deletions.
16 changes: 7 additions & 9 deletions scripts/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import csv
import chardet
import datetime
import requests
import urllib.request

from bs4 import BeautifulSoup
Expand All @@ -24,16 +25,13 @@
]

def fetch_league_links(league):
"""Fetch CSV file links for a league."""
headers = {'User-Agent': generate_user_agent(device_type="desktop", os=('mac', 'linux'))}
req = urllib.request.Request(BASE_URL + league['path'], headers=headers)
with urllib.request.urlopen(req) as response:
html = response.read().decode()
soup = BeautifulSoup(html, 'html.parser')
file_links = soup.find_all('a', href=re.compile(r"mmz4281"))
for link in file_links:
if league['key'] + '.csv' in link['href']:
league['links'].append(link['href'])
response = requests.get(BASE_URL + league['path'], headers=headers, verify=False) # Disable SSL verification
soup = BeautifulSoup(response.text, 'html.parser')
file_links = soup.find_all('a', href=re.compile(r"mmz4281"))
for link in file_links:
if league['key'] + '.csv' in link['href']:
league['links'].append(link['href'])

def download_and_save_data(league):
"""Download data from league links and save in specified format."""
Expand Down
1 change: 1 addition & 0 deletions scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
bs4==0.0.2
requests==2.32.3
user_agent==0.1.10
dataflows==0.5.5
chardet==5.2.0
Expand Down

0 comments on commit c1e991a

Please sign in to comment.