-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathlogo_scrape.py
30 lines (25 loc) · 961 Bytes
/
logo_scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
##################################################
# Feb. 2017
#
# Webscraping to get all NCAA div1 college logos.
# How can we possibly get logos for all the college teams?!
# logo_scrape.py will get 'em for you!
##################################################
import pandas as pd
from datetime import datetime, timedelta
import urllib2
from bs4 import BeautifulSoup
import requests
import urllib2
import re
url = "http://www.foxsports.com/college-basketball/teams"
req = requests.get(url)
soup = BeautifulSoup(req.text, 'html.parser')
logos = soup.find_all('img', attrs = {'class': "wisfb_logoImage"})
team_divs = soup.find_all('div', attrs = {'class': "wisbb_fullTeamStacked"})
team = [x.select('span')[0].text for x in team_divs]
for i, logo in enumerate(logos):
png_link = logo["src"].split("src=")[-1]
download_img = urllib2.urlopen(png_link).read()
with open('static/img/%s.png'%team[i], 'wb') as f:
f.write(download_img)