-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtestapp.py
83 lines (73 loc) · 2.95 KB
/
testapp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import streamlit as st
from bs4 import BeautifulSoup
import requests
from joblib import Parallel, delayed
def get_news_text(url):
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html.parser')
news_paragraphs = soup.find_all('p')[:2]
news_text = '\n'.join([p.text.strip() for p in reversed(news_paragraphs)])
return news_text
def scrape_page(url_pattern, tag_name, page_num):
r = requests.get(f'{url_pattern}{page_num}')
soup = BeautifulSoup(r.text, 'html.parser')
news_items = soup.find_all(tag_name)
results = []
for item in news_items:
headline = item.text.strip()
link = item.a['href']
news_text = get_news_text(link)
results.append((headline, link, news_text))
return results
def scrape_category(url_pattern, tag_name, pages):
results = Parallel(n_jobs=32, verbose=100)(delayed(scrape_page)(url_pattern, tag_name, page_num) for page_num in range(1, pages + 1))
news_data = [item for page_result in results for item in page_result]
return news_data
def display_news(category_data):
for headline, link, news_text in category_data:
st.markdown(f"<h2 style='color: white; font-weight: bold;'>{headline}</h2>", unsafe_allow_html=True)
st.markdown(f"<p style='color: white;'>{news_text}</p>", unsafe_allow_html=True)
st.write(f"[Read more]({link})", unsafe_allow_html=True)
st.write("---")
def main():
st.markdown(
"""
<style>
@import url('https://fonts.googleapis.com/css2?family=Nasalization&display=swap');
.title {
color: white;
font-size: 36px;
font-family: 'Nasalization', sans-serif;
font-weight: bold;
text-align: center;
margin-bottom: 20px;
}
.subheader {
color: white;
font-size: 18px;
font-family: 'Nasalization', sans-serif;
text-align: center;
}
</style>
""",
unsafe_allow_html=True
)
st.markdown('<p class="title">News Aggregator</p>', unsafe_allow_html=True)
st.markdown('<p class="subheader">A project by Abhas Jaiswal</p>', unsafe_allow_html=True)
category = st.selectbox("Select Category", ["India", "Latest", "Cities", "Education", "Trending", "Offbeat", "South"])
category_urls = {
"India": 'https://www.ndtv.com/india/page-',
"Latest": 'https://www.ndtv.com/latest/page-',
"Cities": 'https://www.ndtv.com/cities/page-',
"Education": 'https://www.ndtv.com/education/page-',
"Trending": 'https://www.ndtv.com/trends',
"Offbeat": 'https://www.ndtv.com/offbeat/page-',
"South": 'https://www.ndtv.com/south/page-'
}
if category in category_urls:
news_data = scrape_category(category_urls[category], 'h2', 14 if category != "Trending" else 1)
display_news(news_data)
else:
st.error("Invalid category selected.")
if __name__ == "__main__":
main()