From cc448fe743a1385fd835b3398f8c7a1dada57a9d Mon Sep 17 00:00:00 2001 From: Peter Thaleikis Date: Tue, 28 Nov 2023 20:18:22 +0100 Subject: [PATCH] Upgrading repo tools (#196) * Upgrade tooling * Update workflow * Drop PHP7.4 support * Round 1 of rector * Round 2 of rector * Tweak rules for current state * Upgrading PHP for tools * Drop pint as reg. requirement * Tweak composer requirements * 8.1? * update rector * tweak command * Less splash operators * checkout @ v4? * Drop PHP 8.0 --- .github/workflows/test.yaml | 45 ++++++++++---- composer.json | 26 ++++++--- rector.php | 25 ++++++++ src/DataTransferObjects/FeedEntry.php | 17 ++---- src/PHPScraper.php | 4 +- src/UsesBrowserKit.php | 6 +- src/UsesContent.php | 34 +++++------ src/UsesFeeds.php | 20 +++---- src/UsesFileParsers.php | 84 ++++++++++----------------- src/UsesUrls.php | 4 +- src/UsesXPathFilters.php | 7 --- 11 files changed, 142 insertions(+), 130 deletions(-) create mode 100644 rector.php diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index be855ea..737ae3a 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,4 +1,3 @@ -name: Tests on: [pull_request] jobs: @@ -7,10 +6,10 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - php-version: [8.0, 8.1, 8.2, 8.3] + php-version: [8.1, 8.2, 8.3] composer-flags: [null] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: shivammathur/setup-php@v2 with: php-version: ${{ matrix.php-version }} @@ -18,19 +17,19 @@ jobs: extensions: intl curl - run: php -v - run: composer update ${{ matrix.composer-flags }} --no-interaction --no-progress --prefer-dist --ansi - - run: ./vendor/bin/phpunit --color=always + - run: composer test:unit ## PHPSTAN phpstan: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install PHP - uses: shivammathur/setup-php@2.21.0 + uses: shivammathur/setup-php@v2 with: - php-version: '8.0' + php-version: '8.1' coverage: none env: COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -40,19 +39,41 @@ jobs: run: composer install --prefer-dist --no-interaction --no-progress --optimize-autoloader - name: PHPStan tests - run: composer phpstan + run: composer test:types + + ## RECTOR + rector: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install PHP + uses: shivammathur/setup-php@v2 + with: + php-version: '8.1' + coverage: none + env: + COMPOSER_TOKEN: ${{ secrets.GITHUB_TOKEN }} + update: true + + - name: Install dependencies + run: composer install --prefer-dist --no-interaction --no-progress --optimize-autoloader + + - name: PHPStan tests + run: composer test:refactor ## PINT pint: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install PHP - uses: shivammathur/setup-php@2.21.0 + uses: shivammathur/setup-php@v2 with: - php-version: '8.0' + php-version: '8.1' coverage: none tools: cs2pr env: @@ -63,4 +84,4 @@ jobs: run: composer install --prefer-dist --no-interaction --no-progress --optimize-autoloader - name: Run Pint - run: composer exec -- pint --test --format=checkstyle | cs2pr + run: ./vendor/bin/pint --test --format=checkstyle | cs2pr diff --git a/composer.json b/composer.json index 2f0285c..c1cda80 100644 --- a/composer.json +++ b/composer.json @@ -20,21 +20,23 @@ } ], "require": { - "php": "^8.0", + "php": "^8.1", "ext-intl": "*", "symfony/dom-crawler": "^5.4 || ^6.0", "donatello-za/rake-php-plus": "^1.0.15", "league/uri": "^6.0", "symfony/browser-kit": "^6.0", "symfony/http-client": "^6.0", - "symfony/css-selector": "^6.0", - "laravel/pint": "^1.5" + "symfony/css-selector": "^6.0" }, "require-dev": { "symfony/thanks": "^1.0.0", "phpunit/phpunit": "^8.0.0|^9.0.0", "illuminate/collections": "^8.0.0|^9.0.0", - "phpstan/phpstan": "^1.10" + "laravel/pint": "^1.0", + "phpstan/phpstan": "^1.0", + "rector/rector": "^0.18", + "symfony/var-dumper": "^6.0" }, "autoload": { "psr-4": { @@ -47,9 +49,18 @@ } }, "scripts": { - "test": "./vendor/phpunit/phpunit/phpunit --cache-result --cache-result-file=.tmp/phpunit --order-by=defects --colors=always --stop-on-failure", - "phpstan": "./vendor/bin/phpstan analyse", - "pint": "./vendor/bin/pint --verbose --test" + "refactor": "./vendor/bin/rector", + "lint": "./vendor/bin/pint", + "test:refactor": "./vendor/bin/rector --dry-run", + "test:lint": "./vendor/bin/pint --test", + "test:types": "./vendor/bin/phpstan analyse --ansi src/ tests/ --level=9", + "test:unit": "./vendor/phpunit/phpunit/phpunit --cache-result --cache-result-file=.tmp/phpunit --order-by=defects --colors=always --stop-on-failure", + "test": [ + "@test:refactor", + "@test:lint", + "@test:types", + "@test:unit" + ] }, "funding": [ { @@ -62,6 +73,7 @@ } ], "config": { + "sort-packages": true, "allow-plugins": { "symfony/thanks": true } diff --git a/rector.php b/rector.php new file mode 100644 index 0000000..77e1811 --- /dev/null +++ b/rector.php @@ -0,0 +1,25 @@ +paths([ + __DIR__.'/src', + ]); + + $rectorConfig->rules([ + InlineConstructorDefaultToPropertyRector::class, + ]); + + $rectorConfig->sets([ + // LevelSetList::UP_TO_PHP_82, + // SetList::CODE_QUALITY, + SetList::DEAD_CODE, + SetList::TYPE_DECLARATION, + ]); +}; diff --git a/src/DataTransferObjects/FeedEntry.php b/src/DataTransferObjects/FeedEntry.php index cf5ec74..123b11c 100644 --- a/src/DataTransferObjects/FeedEntry.php +++ b/src/DataTransferObjects/FeedEntry.php @@ -9,25 +9,16 @@ */ class FeedEntry { - // Support for PHP7.4 - public string $title; - - public string $description; - - public string $link; - /** * @todo with drop of PHP7.4 we should make these public and remove the initialization above. * @todo with drop of PHP7.4 and 8.0 we should make this `readonly`. */ public function __construct( - string $title, - string $description, - string $link + // Support for PHP7.4 + public string $title, + public string $description, + public string $link ) { - $this->title = $title; - $this->description = $description; - $this->link = $link; } /** diff --git a/src/PHPScraper.php b/src/PHPScraper.php index e2b772e..f8cdffe 100644 --- a/src/PHPScraper.php +++ b/src/PHPScraper.php @@ -25,10 +25,8 @@ class PHPScraper /** * Holds the Core class. It handles the actual scraping. - * - * @var \Spekulatius\PHPScraper\Core */ - protected $core = null; + protected Core $core; /** * @param PHPScraperConfig $config diff --git a/src/UsesBrowserKit.php b/src/UsesBrowserKit.php index 32e1b3d..0e19c19 100644 --- a/src/UsesBrowserKit.php +++ b/src/UsesBrowserKit.php @@ -13,21 +13,21 @@ trait UsesBrowserKit * * @var \Symfony\Component\BrowserKit\HttpBrowser */ - protected $client = null; + protected $client; /** * Holds the HttpClient * * @var \Symfony\Contracts\HttpClient\HttpClientInterface; */ - protected $httpClient = null; + protected $httpClient; /** * Holds the current page (a Crawler object) * * @var \Symfony\Component\DomCrawler\Crawler */ - protected $currentPage = null; + protected $currentPage; /** * Overwrites the client diff --git a/src/UsesContent.php b/src/UsesContent.php index a3c208e..a1a64bc 100644 --- a/src/UsesContent.php +++ b/src/UsesContent.php @@ -62,7 +62,7 @@ public function baseHref(): ?string /** * Get the header collected as an array * - * @return array + * @return array{charset: mixed, contentType: mixed, viewport: mixed, canonical: mixed, csrfToken: mixed} */ public function headers(): array { @@ -102,6 +102,8 @@ public function description(): ?string /** * Get the meta collected as an array + * + * @return array{author: mixed, image: mixed, keywords: mixed, description: mixed} */ public function metaTags(): array { @@ -221,9 +223,7 @@ public function lists(): array **/ public function orderedLists(): array { - return array_values(array_filter($this->lists(), function ($list) { - return $list['type'] === 'ol'; - })); + return array_values(array_filter($this->lists(), fn ($list): bool => $list['type'] === 'ol')); } /** @@ -231,9 +231,7 @@ public function orderedLists(): array **/ public function unorderedLists(): array { - return array_values(array_filter($this->lists(), function ($list) { - return $list['type'] === 'ul'; - })); + return array_values(array_filter($this->lists(), fn ($list): bool => $list['type'] === 'ul')); } /** @@ -254,9 +252,7 @@ public function cleanParagraphs(): array { return array_values(array_filter( $this->paragraphs(), - function ($paragraph) { - return $paragraph !== ''; - } + fn ($paragraph): bool => $paragraph !== '' )); } @@ -287,7 +283,7 @@ public function outlineWithParagraphs(): array foreach ($result as $index => $array) { $result[$index] = array_combine(['tag', 'content'], (array) $array); - $result[$index]['content'] = trim($result[$index]['content']); + $result[$index]['content'] = trim((string) $result[$index]['content']); } return $result; @@ -451,7 +447,7 @@ public function internalLinks(): array // Filter the array return array_values(array_filter( $this->links(), - function ($link) use (&$currentRootDomain) { + function ($link) use (&$currentRootDomain): bool { $linkRootDomain = Uri::createFromString($link)->getHost(); return $currentRootDomain === $linkRootDomain; @@ -502,18 +498,18 @@ public function linksWithDetails(): array // Prepare the result set. $entry = [ 'url' => $uri, - 'protocol' => \strpos($uri, ':') !== false ? explode(':', $uri)[0] : null, + 'protocol' => str_contains($uri, ':') ? explode(':', $uri)[0] : null, 'text' => trim($link->nodeValue ?? ''), 'title' => $link->getAttribute('title') === '' ? null : $link->getAttribute('title'), 'target' => $link->getAttribute('target') === '' ? null : $link->getAttribute('target'), 'rel' => ($rel === '') ? null : strtolower($rel), 'image' => $image, - 'isNofollow' => ($rel === '') ? false : (\strpos($rel, 'nofollow') !== false), - 'isUGC' => ($rel === '') ? false : (\strpos($rel, 'ugc') !== false), - 'isSponsored' => ($rel === '') ? false : (\strpos($rel, 'sponsored') !== false), - 'isMe' => ($rel === '') ? false : (\strpos($rel, 'me') !== false), - 'isNoopener' => ($rel === '') ? false : (\strpos($rel, 'noopener') !== false), - 'isNoreferrer' => ($rel === '') ? false : (\strpos($rel, 'noreferrer') !== false), + 'isNofollow' => ($rel === '') ? false : str_contains($rel, 'nofollow'), + 'isUGC' => ($rel === '') ? false : str_contains($rel, 'ugc'), + 'isSponsored' => ($rel === '') ? false : str_contains($rel, 'sponsored'), + 'isMe' => ($rel === '') ? false : str_contains($rel, 'me'), + 'isNoopener' => ($rel === '') ? false : str_contains($rel, 'noopener'), + 'isNoreferrer' => ($rel === '') ? false : str_contains($rel, 'noreferrer'), ]; $result[] = $entry; diff --git a/src/UsesFeeds.php b/src/UsesFeeds.php index 7a0b38f..c80fa29 100644 --- a/src/UsesFeeds.php +++ b/src/UsesFeeds.php @@ -19,7 +19,7 @@ public function sitemapUrl(): string * * @return array $sitemap */ - public function sitemapRaw(?string $url = null): array + public function sitemapRaw(string $url = null): array { return $this->parseXml($this->fetchAsset($url ?? $this->sitemapUrl())); } @@ -31,11 +31,11 @@ public function sitemapRaw(?string $url = null): array * * @return array $sitemap */ - public function sitemap(?string $url = null): array + public function sitemap(string $url = null): array { return array_map( // Create the generic DTO for each - fn ($entry) => FeedEntry::fromArray([ + fn ($entry): FeedEntry => FeedEntry::fromArray([ 'title' => '', 'description' => '', 'link' => $entry['loc'], @@ -59,7 +59,7 @@ public function searchIndexUrl(): string * * @return array $searchIndex */ - public function searchIndexRaw(?string $url = null): array + public function searchIndexRaw(string $url = null): array { return $this->parseJson($this->fetchAsset($url ?? $this->searchIndexUrl())); } @@ -69,11 +69,11 @@ public function searchIndexRaw(?string $url = null): array * * @return array $searchIndex */ - public function searchIndex(?string $url = null): array + public function searchIndex(string $url = null): array { return array_map( // Create the generic DTO for each - fn ($entry) => FeedEntry::fromArray([ + fn ($entry): FeedEntry => FeedEntry::fromArray([ 'title' => $entry['title'], 'description' => $entry['snippet'], 'link' => $entry['link'], @@ -93,34 +93,32 @@ public function rssUrls(): array { $urls = $this->filterExtractAttributes('//link[@type="application/rss+xml"]', ['href']); - return array_map(fn ($url) => (string) $this->makeUrlAbsolute($url), $urls); + return array_map(fn ($url): string => (string) $this->makeUrlAbsolute($url), $urls); } /** * Fetches a given set of RSS feeds and returns one array with raw data. * - * @param ?string ...$urls * @return array $rss */ public function rssRaw(?string ...$urls): array { return array_map( fn ($url) => $this->parseXml($this->fetchAsset((string) $url)), - empty($urls) ? $this->rssUrls() : $urls + $urls === [] ? $this->rssUrls() : $urls ); } /** * Fetches a given set of RSS feeds and returns one array with raw data. * - * @param ?string ...$urls * @return array $rss */ public function rss(?string ...$urls): array { return array_map( // Create the generic DTO for each - fn ($entry) => FeedEntry::fromArray([ + fn ($entry): FeedEntry => FeedEntry::fromArray([ 'title' => $entry['title'], 'link' => $entry['link']['@attributes']['href'], ]), diff --git a/src/UsesFileParsers.php b/src/UsesFileParsers.php index 0068c1b..d2ff7cf 100644 --- a/src/UsesFileParsers.php +++ b/src/UsesFileParsers.php @@ -7,16 +7,13 @@ trait UsesFileParsers /** * Base Util to decode a CSV string. * - * @param ?string $separator - * @param ?string $enclosure - * @param ?string $escape * @return array $data */ public function csvDecodeRaw( string $csvString, - ?string $separator = null, - ?string $enclosure = null, - ?string $escape = null + string $separator = null, + string $enclosure = null, + string $escape = null ): array { try { $csv = array_map( @@ -38,23 +35,20 @@ public function csvDecodeRaw( /** * Decode CSV and cast types. * - * @param ?string $separator - * @param ?string $enclosure - * @param ?string $escape * @return array $data */ public function csvDecode( string $csvString, - ?string $separator = null, - ?string $enclosure = null, - ?string $escape = null + string $separator = null, + string $enclosure = null, + string $escape = null ): array { try { $csv = $this->csvDecodeRaw($csvString, $separator, $enclosure, $escape); // Cast native and custom types $csv = array_map( - fn ($line) => array_map( + fn ($line): array => array_map( fn ($cell) => $this->castType($cell), $line ), @@ -70,16 +64,13 @@ public function csvDecode( /** * Util to decode a CSV string to asso. array. * - * @param ?string $separator - * @param ?string $enclosure - * @param ?string $escape * @return array $data */ public function csvDecodeWithHeaderRaw( string $csvString, - ?string $separator = null, - ?string $enclosure = null, - ?string $escape = null + string $separator = null, + string $enclosure = null, + string $escape = null ): array { try { $csv = $this->csvDecodeRaw($csvString, $separator, $enclosure, $escape); @@ -89,7 +80,7 @@ public function csvDecodeWithHeaderRaw( // Combine the rows with the header entry. array_walk( $csv, - function (&$row, $key, $header) { + function (&$row, $key, $header): void { $row = array_combine($header, $row); }, $header @@ -104,16 +95,13 @@ function (&$row, $key, $header) { /** * Decode a CSV string to asso. array and cast types. * - * @param ?string $separator - * @param ?string $enclosure - * @param ?string $escape * @return array $data */ public function csvDecodeWithHeader( string $csvString, - ?string $separator = null, - ?string $enclosure = null, - ?string $escape = null + string $separator = null, + string $enclosure = null, + string $escape = null ): array { try { $csv = $this->csvDecodeWithHeaderRaw($csvString, $separator, $enclosure, $escape); @@ -133,18 +121,16 @@ public function csvDecodeWithHeader( /** * Helper method to cast types - * - * @return int|float|string */ - public function castType(string $entry) + public function castType(string $entry): int|float|string { // Looks like an int? - if ($entry == (string) (int) $entry) { + if ($entry == (int) $entry) { return (int) $entry; } // Looks like a float? - if ($entry == (string) (float) $entry) { + if ($entry == (float) $entry) { return (float) $entry; } @@ -154,17 +140,13 @@ public function castType(string $entry) /** * Parses a given CSV string or fetches the URL and parses it. * - * @param ?string $csvStringOrUrl - * @param ?string $separator - * @param ?string $enclosure - * @param ?string $escape * @return array $data */ public function parseCsv( - ?string $csvStringOrUrl = null, - ?string $separator = null, - ?string $enclosure = null, - ?string $escape = null + string $csvStringOrUrl = null, + string $separator = null, + string $enclosure = null, + string $escape = null ): array { // Check if we got either a current page or at least a URL string to process if ($csvStringOrUrl === null && $this->currentPage === null) { @@ -210,17 +192,13 @@ public function parseCsv( /** * Parses a given CSV string into an asso. with headers or fetches the URL and parses it. * - * @param ?string $csvStringOrUrl - * @param ?string $separator - * @param ?string $enclosure - * @param ?string $escape * @return array $data */ public function parseCsvWithHeader( - ?string $csvStringOrUrl = null, - ?string $separator = null, - ?string $enclosure = null, - ?string $escape = null + string $csvStringOrUrl = null, + string $separator = null, + string $enclosure = null, + string $escape = null ): array { // Check if we got either a current page or at least a URL string to process if ($csvStringOrUrl === null && $this->currentPage === null) { @@ -266,10 +244,9 @@ public function parseCsvWithHeader( /** * Parses a given JSON string or fetches the URL and parses it. * - * @param ?string $jsonStringOrUrl * @return array $data */ - public function parseJson(?string $jsonStringOrUrl = null): array + public function parseJson(string $jsonStringOrUrl = null): array { // Check if we got either a current page or at least a URL string to process if ($jsonStringOrUrl === null && $this->currentPage === null) { @@ -301,7 +278,9 @@ public function parseJson(?string $jsonStringOrUrl = null): array // Fallback on the current URL, if needed and possible (`go` was used before). $jsonStringOrUrl ?? $this->currentUrl() ), - true + true, + 512, + JSON_THROW_ON_ERROR ); } catch (\Exception $e) { throw new \Exception('Failed to parse JSON: ' . $e->getMessage()); @@ -313,10 +292,9 @@ public function parseJson(?string $jsonStringOrUrl = null): array /** * Parses a given XML string or fetches the URL and parses it. * - * @param ?string $xmlStringOrUrl * @return array $data */ - public function parseXml(?string $xmlStringOrUrl = null): array + public function parseXml(string $xmlStringOrUrl = null): array { // Check if we got either a current page or at least a URL string to process if ($xmlStringOrUrl === null && $this->currentPage === null) { @@ -357,6 +335,6 @@ protected function xmlDecode(string $xmlString): array $xml = simplexml_load_string(trim($xmlString), 'SimpleXMLElement', LIBXML_NOCDATA); // Convert XML to JSON and then to an associative array - return (array) json_decode((string) json_encode($xml), true); + return (array) json_decode(json_encode($xml, JSON_THROW_ON_ERROR), true, 512, JSON_THROW_ON_ERROR); } } diff --git a/src/UsesUrls.php b/src/UsesUrls.php index 1866483..dceae2d 100644 --- a/src/UsesUrls.php +++ b/src/UsesUrls.php @@ -28,7 +28,7 @@ public function currentUrl(): string /** * Returns the current host * - * @return string $host + * @return string|null $host */ public function currentHost(): ?string { @@ -52,7 +52,7 @@ public function currentBaseHost(): string * * @return ?string $absoluteUrl */ - public function makeUrlAbsolute(?string $url = null, string $baseUrl = null): ?string + public function makeUrlAbsolute(string $url = null, string $baseUrl = null): ?string { // Allow to pass null through if ($url === null || $this->currentPage === null) { diff --git a/src/UsesXPathFilters.php b/src/UsesXPathFilters.php index d8798a3..703f416 100644 --- a/src/UsesXPathFilters.php +++ b/src/UsesXPathFilters.php @@ -16,8 +16,6 @@ public function filter(string $query): Crawler /** * Filters the current page by a xPath-query and returns the first one, or null. - * - * @return ?Crawler */ public function filterFirst(string $query): ?Crawler { @@ -28,8 +26,6 @@ public function filterFirst(string $query): ?Crawler /** * Filters the current page by a xPath-query and returns the first ones content, or null. - * - * @return ?string */ public function filterFirstText(string $query): ?string { @@ -65,7 +61,6 @@ public function filterExtractAttributes(string $query, array $attributes): array * Filters the current page by a xPath-query and returns the selected attributes of the first match. * * @param array $attributes - * @return ?string */ public function filterFirstExtractAttribute(string $query, array $attributes): ?string { @@ -76,8 +71,6 @@ public function filterFirstExtractAttribute(string $query, array $attributes): ? /** * Returns the content attribute for the first result of the query, or null. - * - * @return ?string */ public function filterFirstContent(string $query): ?string {