-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathscrape.py
41 lines (39 loc) · 1.13 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import requests
import re
import os.path
from os import path
import bs4
from bs4 import BeautifulSoup
from datetime import datetime
def getContents():
url = "https://www.mohfw.gov.in/"
r = requests.get(url)
txt = ""
if r.status_code == 200:
txt = r.text
return txt
def scrape_now():
######## list declarations #######
stats_list = []
state_list = []
confirmed_list = []
cured_list = []
death_list = []
######## parse starts #######
txt = getContents()
soup = BeautifulSoup(txt, 'html.parser')
###### get stats first ########
stats = soup.find("div", {"class": "site-stats-count"})
for length in stats.find_all("strong"):
stats_list.append(length.getText())
###### get states data #######
states_data = soup.find("section", {"id": "state-data"})
tables = states_data.find_all('tbody')
for row in tables[0].findAll("tr"):
col = row.findAll("td")
if((col[0].getText()).isnumeric()):
state_list.append(col[1].getText())
confirmed_list.append(int(col[2].getText()))
cured_list.append(int(col[3].getText()))
death_list.append(int(col[4].getText()))
return stats_list, state_list, confirmed_list, cured_list, death_list