Skip to content

Commit

Permalink
Fixed google search empty result
Browse files Browse the repository at this point in the history
  • Loading branch information
HurinHu committed Jul 11, 2020
1 parent 2c132df commit f63134b
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 97 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
__pycache__/
GoogleNews.egg-info/
.DS_Store
/venv
.idea
*.html
._*
.coverage
14 changes: 14 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
language: python
python:
- "3.6"
# command to install dependencies
install:
- pip install beautifulsoup4
- pip install coverage
- pip install coveralls
# command to run tests
script:
- coverage run -m unittest discover 'test' 'test*.py'
# coverage
after_success:
coveralls
75 changes: 0 additions & 75 deletions GoogleNews.egg-info/PKG-INFO

This file was deleted.

11 changes: 0 additions & 11 deletions GoogleNews.egg-info/SOURCES.txt

This file was deleted.

1 change: 0 additions & 1 deletion GoogleNews.egg-info/top_level.txt

This file was deleted.

15 changes: 7 additions & 8 deletions GoogleNews/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from bs4 import BeautifulSoup as Soup


class GoogleNews:

def __init__(self,lang="en",period="",start="",end=""):
Expand Down Expand Up @@ -57,30 +56,30 @@ def getpage(self, page=1):
self.response = urllib.request.urlopen(self.req)
self.page = self.response.read()
self.content = Soup(self.page, "html.parser")
result = self.content.find_all("div", class_="g")
result = self.content.find_all("div", id="search")[0].find_all("g-card")
for item in result:
try:
tmp_text = item.find("h3").text
tmp_text = item.find("div", {"role" : "heading"}).text.replace("\n","")
except Exception:
tmp_text = ''
try:
tmp_link = item.find("h3").find("a").get("href")
tmp_link = item.find("a").get("href")
except Exception:
tmp_link = ''
try:
tmp_media = item.find("h3").findNext('div').find_all("span")[0].text
tmp_media = item.findAll("g-img")[1].parent.text
except Exception:
tmp_media = ''
try:
tmp_date = item.find("h3").findNext('div').find_all("span")[2].text
tmp_date = item.find("div", {"role" : "heading"}).next_sibling.findNext('div').findNext('div').text
except Exception:
tmp_date = ''
try:
tmp_desc = item.find("div", class_="st").text
tmp_desc = item.find("div", {"role" : "heading"}).next_sibling.findNext('div').text.replace("\n","")
except Exception:
tmp_desc = ''
try:
tmp_img = item.find("img").get("src")
tmp_img = item.findAll("g-img")[0].find("img").get("src")
except Exception:
tmp_img = ''
self.__texts.append(tmp_text)
Expand Down
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# GoogleNews

![Travis (.com)](https://img.shields.io/travis/com/HurinHu/GoogleNews)
![Coveralls github](https://img.shields.io/coveralls/github/HurinHu/GoogleNews)
![PyPI](https://img.shields.io/pypi/v/GoogleNews)
![PyPI - Downloads](https://img.shields.io/pypi/dm/GoogleNews)
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/GoogleNews)
![PyPI - Wheel](https://img.shields.io/pypi/wheel/GoogleNews)
![GitHub contributors](https://img.shields.io/github/contributors/HurinHu/GoogleNews)
![GitHub issues](https://img.shields.io/github/issues-raw/HurinHu/GoogleNews)
![Upload Python Package](https://github.com/HurinHu/GoogleNews/workflows/Upload%20Python%20Package/badge.svg)
![GitHub](https://img.shields.io/github/license/HurinHu/GoogleNews)

## Install
```
pip install GoogleNews
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="GoogleNews",
version="1.3.8",
version="1.3.9",
author="Hurin Hu",
author_email="[email protected]",
description="Google News search for Python",
Expand All @@ -15,7 +15,7 @@
packages=setuptools.find_packages(),
install_requires=['beautifulsoup4'],
classifiers=[
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.6",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
Expand Down
Empty file added test/__init__.py
Empty file.
70 changes: 70 additions & 0 deletions test/testSearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import unittest

from GoogleNews import GoogleNews

keyword = 'Apple'

class NumbersTest(unittest.TestCase):

def testResultNumberWithDefaultPage(self):
googlenews = GoogleNews()
googlenews.search(keyword)
length = len(googlenews.result())
self.assertEqual(length, 10)
print('Result length with only one page is correct')

def testResultNumberWithTwoPages(self):
googlenews = GoogleNews()
googlenews.search(keyword)
googlenews.getpage(2)
length = len(googlenews.result())
self.assertEqual(length, 20)
print('Result length with two pages is correct')

class TestStringMethods(unittest.TestCase):

def testResultContainsKeyword(self):
googlenews = GoogleNews()
googlenews.search(keyword)
result = googlenews.result()[0]
self.assertIn(keyword.lower(), result.get('desc').lower())
print('Result contains keyword')

def testResultHasLink(self):
googlenews = GoogleNews()
googlenews.search(keyword)
result = googlenews.result()[0]
self.assertIn('http', result.get('link').lower())
print('Result contains http link')

def testResultHasImage(self):
googlenews = GoogleNews()
googlenews.search(keyword)
result = googlenews.result()[0]
self.assertIn('base64', result.get('img').lower())
print('Result contains image')

def testResultHasTitle(self):
googlenews = GoogleNews()
googlenews.search(keyword)
result = googlenews.result()[0]
self.assertIsNot('', result.get('title').lower())
print('Result title is not empty')

def testResultHasMedia(self):
googlenews = GoogleNews()
googlenews.search(keyword)
result = googlenews.result()[0]
self.assertIsNot('', result.get('media').lower())
print('Result media is not empty')

def testResultHasDate(self):
googlenews = GoogleNews()
googlenews.search(keyword)
result = googlenews.result()[0]
self.assertIsNot('', result.get('date').lower())
print('Result date is not empty')


if __name__ == '__main__':
unittest.main()

0 comments on commit f63134b

Please sign in to comment.