Skip to content

Commit

Permalink
Add logger to the store
Browse files Browse the repository at this point in the history
Automatically add the crawler's logger to the `Store` so you can also
log messages from there. This can be breaking as the `StoreInterface`
now also requires the `addLogger` method. The new abstract `Store` class
already implements it, so you can just extend it.
Also prepare for v0.4 release tag.
  • Loading branch information
otsch committed May 6, 2022
1 parent 26cb3ff commit 502f3d6
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 9 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

## [0.4.0] - 2022-05-06
### Added
* The `BaseStep` class now has `where()` and `orWhere()` methods to
filter step outputs. You can set multiple filters that will be
Expand All @@ -19,6 +21,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
`onSameDomain()`, `notOnSameDomain()`, `onDomain()`,
`onSameHost()`, `notOnSameHost()`, `onHost()` to restrict the
which links to find.
* Automatically add the crawler's logger to the `Store` so you can
also log messages from there. This can be breaking as the
`StoreInterface` now also requires the `addLogger` method. The
new abstract `Store` class already implements it, so you can just
extend it.

### Changed
* The `Csv` step can now also be used without defining a column
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

This package provides kind of a framework and a lot of ready
to use, so-called steps, that you can combine to build your
own crawlers or scrapers with.
own crawlers and scrapers with.

## Documentation

You can find the documentation at
[crwlr.software](https://www.crwlr.software/packages/crawler/v0.2/getting-started).
[crwlr.software](https://www.crwlr.software/packages/crawler/v0.4/getting-started).

## Contributing

Expand Down
2 changes: 2 additions & 0 deletions src/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ public function getLoader(): LoaderInterface

public function setStore(StoreInterface $store): static
{
$store->addLogger($this->logger);

$this->store = $store;

return $this;
Expand Down
6 changes: 4 additions & 2 deletions src/Stores/SimpleCsvFileStore.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
use Crwlr\Crawler\Result;
use Exception;

class SimpleCsvFileStore implements StoreInterface
class SimpleCsvFileStore extends Store
{
private int $createTimestamp;

private bool $isFirstResult = true;

public function __construct(private string $storePath, private ?string $filePrefix = null)
public function __construct(private readonly string $storePath, private readonly ?string $filePrefix = null)
{
$this->createTimestamp = time();

Expand All @@ -35,6 +35,8 @@ public function store(Result $result): void
fputcsv($fileHandle, array_values($result->toArray()));

fclose($fileHandle);

$this->logger?->info('Stored a result');
}

public function filePath(): string
Expand Down
17 changes: 17 additions & 0 deletions src/Stores/Store.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

namespace Crwlr\Crawler\Stores;

use Psr\Log\LoggerInterface;

abstract class Store implements StoreInterface
{
protected ?LoggerInterface $logger = null;

public function addLogger(LoggerInterface $logger): static
{
$this->logger = $logger;

return $this;
}
}
3 changes: 3 additions & 0 deletions src/Stores/StoreInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
namespace Crwlr\Crawler\Stores;

use Crwlr\Crawler\Result;
use Psr\Log\LoggerInterface;

interface StoreInterface
{
public function store(Result $result): void;

public function addLogger(LoggerInterface $logger): static;
}
31 changes: 26 additions & 5 deletions tests/CrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
use Crwlr\Crawler\Steps\Loading\LoadingStepInterface;
use Crwlr\Crawler\Steps\Step;
use Crwlr\Crawler\Steps\StepInterface;
use Crwlr\Crawler\Stores\Store;
use Crwlr\Crawler\Stores\StoreInterface;
use Crwlr\Crawler\UserAgents\BotUserAgent;
use Crwlr\Crawler\UserAgents\UserAgentInterface;
Expand Down Expand Up @@ -384,6 +385,8 @@ protected function loader(UserAgentInterface $userAgent, LoggerInterface $logger
it('sends all results to the Store when there is one and still yields the results', function () {
$store = Mockery::mock(StoreInterface::class);

$store->shouldReceive('addLogger');

$store->shouldReceive('store')->times(3);

$crawler = helper_getDummyCrawler();
Expand Down Expand Up @@ -415,6 +418,8 @@ function () {

$store = Mockery::mock(StoreInterface::class);

$store->shouldReceive('addLogger');

$store->shouldNotReceive('store');

$crawler = helper_getDummyCrawler()
Expand All @@ -426,7 +431,7 @@ function () {

$store = Mockery::mock(StoreInterface::class);

$store->shouldReceive('store')->once();
$store->shouldReceive('store', 'addLogger')->once();

$crawler = helper_getDummyCrawler()
->addStep($step)
Expand Down Expand Up @@ -472,10 +477,18 @@ protected function invoke(mixed $input): Generator
}
};

$store = new class () extends Store {
public function store(Result $result): void
{
$this->logger?->info('Stored a result');
}
};

$crawler = helper_getDummyCrawler()
->inputs(['input1', 'input2'])
->addStep('foo', $step1)
->addStep('bar', $step2);
->addStep('bar', $step2)
->setStore($store);

$crawler->runAndTraverse();

Expand All @@ -489,11 +502,19 @@ protected function invoke(mixed $input): Generator

expect($outputLines[2])->toContain('step2 called');

expect($outputLines[3])->toContain('step1 called');
expect($outputLines[3])->toContain('Stored a result');

expect($outputLines[4])->toContain('Stored a result');

expect($outputLines[5])->toContain('step1 called');

expect($outputLines[6])->toContain('step2 called');

expect($outputLines[7])->toContain('step2 called');

expect($outputLines[4])->toContain('step2 called');
expect($outputLines[8])->toContain('Stored a result');

expect($outputLines[5])->toContain('step2 called');
expect($outputLines[9])->toContain('Stored a result');
}
);

Expand Down

0 comments on commit 502f3d6

Please sign in to comment.