Skip to content

Commit

Permalink
fixed get_news() start/end date and image issue
Browse files Browse the repository at this point in the history
  • Loading branch information
HurinHu committed Mar 24, 2024
1 parent f9f81e4 commit 3a9e2f3
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 6 deletions.
14 changes: 10 additions & 4 deletions GoogleNews/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def __init__(self,lang="en",period="",start="",end="",encode="utf-8",region=None
self.__end = end
self.__encode = encode
self.__exception = False
self.__version = '1.6.13'
self.__version = '1.6.14'

def getVersion(self):
return self.__version
Expand Down Expand Up @@ -146,6 +146,12 @@ def build_response(self):
result = self.content.find_all("a",attrs={'data-ved': True})
return result

def remove_after_last_fullstop(self, s):
# Find the last occurrence of the full stop
last_period_index = s.rfind('.')
# Slice the string up to the last full stop
return s[:last_period_index+1] if last_period_index != -1 else s

def page_at(self, page=1):
"""
Retrieves a specific page from google.com in the news sections into __results.
Expand Down Expand Up @@ -184,7 +190,7 @@ def page_at(self, page=1):
tmp_date = ''
tmp_datetime=None
try:
tmp_desc = item.find_next_sibling('div').find('div').find_next_sibling('div').find('div').find('div').find('div').contents[0].replace('\n','')
tmp_desc = self.remove_after_last_fullstop(item.find('div').find_next_sibling('div').find('div').find_next_sibling('div').find('div').find('div').find('div').text).replace('\n','')
except Exception:
tmp_desc = ''
try:
Expand Down Expand Up @@ -240,7 +246,7 @@ def get_page(self, page=1):
tmp_date = ''
tmp_datetime=None
try:
tmp_desc = item.find_next_sibling('div').find('div').find_next_sibling('div').find('div').find('div').find('div').contents[0].replace('\n','')
tmp_desc = self.remove_after_last_fullstop(item.find('div').find_next_sibling('div').find('div').find_next_sibling('div').find('div').find('div').find('div').text).replace('\n','')
except Exception:
tmp_desc = ''
try:
Expand Down Expand Up @@ -329,7 +335,7 @@ def get_news(self, key="",deamplify=False):
desc = 'video'
# image
try:
img = article.find("figure").find("img").get("src")
img = 'news.google.com'+article.find("figure").find("img").get("src")
except:
img = None
# site
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="GoogleNews",
version="1.6.13",
version="1.6.14",
author="Hurin Hu",
author_email="[email protected]",
description="Google News search for Python",
Expand Down
2 changes: 1 addition & 1 deletion test/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class TestStringMethods(unittest.TestCase):

def testVersion(self):
googlenews = GoogleNews()
version = '1.6.13'
version = '1.6.14'
self.assertIn(version, googlenews.getVersion())
print('Latest version matched')

Expand Down

0 comments on commit 3a9e2f3

Please sign in to comment.