Skip to content

Commit

Permalink
Merge pull request #7 from seotracker/mickaelandrieu-patch-1
Browse files Browse the repository at this point in the history
Library update (Hello 2017! Google have updated his search engine)
  • Loading branch information
mickaelandrieu authored Nov 3, 2017
2 parents 86d97d6 + bd34553 commit a41a594
Show file tree
Hide file tree
Showing 7 changed files with 381 additions and 116 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ php:
- 7.0
- 7.1

install: composer install --prefer-dist --no-interaction
install: composer install --no-interaction

script:
- phpunit
- ./vendor/bin/phpunit
2 changes: 1 addition & 1 deletion Adapter/Crawler/SymfonyCrawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public function isOk()
*/
public function get($cssSelector)
{
if (class_exists('Symfony\\Component\\CssSelector\\CssSelector')) {
if (class_exists('Symfony\\Component\\CssSelector\\CssSelectorConverter')) {
return $this->filter($cssSelector);
}

Expand Down
8 changes: 5 additions & 3 deletions Model/GoogleEngine.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class GoogleEngine implements SearchEngineInterface
private $name;
private $scrapper;

const LINK_SELECTOR = "#res div.g h3 > a";

public function __construct(ScrapperInterface $scrapper, CrawlerInterface $crawler, $name = null, $locale = null)
{
$this->crawler = $crawler;
Expand Down Expand Up @@ -74,7 +76,7 @@ public function getWebsites($needle, $limit)

$content = $this->scrapper->get($url);
$crawler = $this->crawler->setContent($content);
$links = $crawler->get('#res li.g > h3 > a');
$links = $crawler->get(self::LINK_SELECTOR);

$websites = new WebsiteCollection();

Expand All @@ -95,7 +97,7 @@ public function getPosition($needle, WebsiteInterface $website)
$url = $this->getRootUrl()."q=$needle&gbv=1&num=200";

$crawler = $this->crawler->setContent($this->scrapper->get($url));
$links = $crawler->get('#res li.g > h3 > a');
$links = $crawler->get(self::LINK_SELECTOR);

foreach ($links as $position => $link) {
$fullLocation = $link->getAttribute('href');
Expand All @@ -118,7 +120,7 @@ public function getBacklinks(WebsiteInterface $website)
$url = $this->getRootUrl().'q=link:"'. $websiteLocation . '"-site:'. $websiteLocation .'&num=100';

$crawler = $this->crawler->setContent($this->scrapper->get($url));
$links = $crawler->get('#res li.g > h3 > a');
$links = $crawler->get(self::LINK_SELECTOR);

foreach ($links as $position => $link) {
$fullLocation = $link->getAttribute('href');
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ This is a common way to deal with websites, search engines and scrappers.
[![SensioLabsInsight](https://insight.sensiolabs.com/projects/2c440481-3f27-4b15-a635-e7d701ac1ae4/small.png)](https://insight.sensiolabs.com/projects/2c440481-3f27-4b15-a635-e7d701ac1ae4)


1) Websites
## Websites
-----------

Website is an object representation of a real website.
Expand All @@ -23,7 +23,7 @@ A website object can return useful data for SEO like:

See ``WebsiteInterface`` for more information about it.

2) SearchEngines
## SearchEngines
----------------

SearchEngine is an object representation of a real search engine.
Expand All @@ -37,7 +37,7 @@ A search engine can return useful datas for SEO like:

See ``SearchEngineInterface`` for more information about it.

3) Scrappers
## Scrappers
------------

A scrapper is an object used to get HTML from internet network.
Expand All @@ -47,7 +47,7 @@ which accept at least 1 argument: an url location.

See ``ScrapperInterface`` and implementations in ``Adapter\Scrapper`` folder

4) Crawlers
## Crawlers
-----------

A crawler is an object used to query and manipulate HTML DOM.
Expand All @@ -57,7 +57,7 @@ Seo-core offers an interface and his Symfony2-Component based implementation.
See ``CrawlerInterface`` and implementation in ``Adapter\Crawler`` folder.


5) Example
## Example
----------

```php
Expand Down
456 changes: 358 additions & 98 deletions Tests/Fixtures/google.html

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions Tests/Functional/FunctionalTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public function testGoogleSearchReturnWebsiteCollection()

public function testGetPositionOnWebsiteReturnInteger()
{
$location = 'http://www.google.fr';
$location = 'https://www.google.fr';
$content = file_get_contents('./Tests/Fixtures/google.html');
$website = new Website($this->crawler, $content, $location);
$googleEngine = new GoogleFranceEngine($this->scrapper, $this->crawler);
Expand All @@ -72,11 +72,11 @@ public function testGetPositionOnWebsiteReturnInteger()

public function testGetBacklinksReturnArrayOfLinks()
{
$location = 'http://www.google.fr';
$location = 'https://www.google.fr';
$content = file_get_contents('./Tests/Fixtures/google.html');
$googleEngine = new GoogleFranceEngine($this->scrapper, $this->crawler);
$website = new Website($this->crawler, $content, $location);

$this->assertEquals(100, count($googleEngine->getBacklinks($website)));
$this->assertGreaterThan(30, count($googleEngine->getBacklinks($website)));
}
}
11 changes: 7 additions & 4 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@
"authors": [
{
"name": "Mickaël Andrieu",
"email": "mickael.andrieu@hotmail.fr"
"email": "mickael.andrieu@solvolabs.com"
}
],
"require": {
"php": ">=5.4.4",
"php": ">=5.6",
"linclark/microdata-php": "2.0.2",
"symfony/css-selector": "^2.3|^3.0",
"symfony/dom-crawler": "^2.3|^3.0"
"symfony/css-selector": "~3.3",
"symfony/dom-crawler": "~3.3"
},
"require-dev": {
"phpunit/phpunit": "~4.8"
},
"suggest": {
"ext-curl": "For usage of Curl adapter"
Expand Down

0 comments on commit a41a594

Please sign in to comment.