Skip to content

Commit

Permalink
URL check tool is time limited
Browse files Browse the repository at this point in the history
+ unchecked or too old external links cause page rendering
+ url checker use a time limit : 3s for page/read, 8s for page/render and 12s for multi-render if checkbox is ticked
+ Serviceurlchecker is instanciated once for multi render
+ url cache expire in 90 days
+ unchecked URLs are listed in home view
  • Loading branch information
vincent-peugnet committed Nov 25, 2024
1 parent d3c5cf2 commit d6b8fff
Show file tree
Hide file tree
Showing 12 changed files with 126 additions and 46 deletions.
5 changes: 3 additions & 2 deletions app/class/Controllerhome.php
Original file line number Diff line number Diff line change
Expand Up @@ -327,13 +327,14 @@ public function multiedit()
public function multirender(): void
{
$pagelist = $_POST['pagesid'] ?? [];
$checkurl = $_POST['checkurl'] ?? false;
$checkurl = boolval($_POST['checkurl']);
$total = count($pagelist);
$pagelist = $this->pagemanager->pagelistbyid($pagelist);
$count = 0;
$urlchecker = $checkurl ? new Serviceurlchecker(12) : null; // time to check URLs is limited to 12s
foreach ($pagelist as $page) {
try {
$page = $this->pagemanager->renderpage($page, $this->router, $checkurl);
$page = $this->pagemanager->renderpage($page, $this->router, $urlchecker);
if ($this->pagemanager->update($page)) {
$count++;
}
Expand Down
8 changes: 4 additions & 4 deletions app/class/Controllerpage.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public function render(string $page): void

if ($this->importpage() && $this->user->iseditor()) {
try {
$this->page = $this->pagemanager->renderpage($this->page, $this->router, true);
$this->page = $this->pagemanager->renderpage($this->page, $this->router, new Serviceurlchecker(8));
} catch (RuntimeException $e) {
Logger::errorex($e);
}
Expand Down Expand Up @@ -116,7 +116,7 @@ private function templaterender(Page $page): void
$templates = $this->pagemanager->getpagecsstemplates($page);
foreach ($templates as $page) {
if ($this->pagemanager->needtoberendered($page)) {
$page = $this->pagemanager->renderpage($page, $this->router, false);
$page = $this->pagemanager->renderpage($page, $this->router, null);
$this->pagemanager->update($page);
}
}
Expand All @@ -127,7 +127,7 @@ private function templaterender(Page $page): void
try {
$templatejs = $this->pagemanager->get($page->templatejavascript());
if ($this->pagemanager->needtoberendered($templatejs)) {
$templatejs = $this->pagemanager->renderpage($templatejs, $this->router, false);
$templatejs = $this->pagemanager->renderpage($templatejs, $this->router, null);
$this->pagemanager->update($templatejs);
}
} catch (RuntimeException $e) {
Expand Down Expand Up @@ -186,7 +186,7 @@ public function read(string $page): void
$oldlinkto = $this->page->linkto();
}
try {
$this->page = $this->pagemanager->renderpage($this->page, $this->router, true);
$this->page = $this->pagemanager->renderpage($this->page, $this->router, new Serviceurlchecker(3));
} catch (RuntimeException $e) {
Logger::errorex($e);
}
Expand Down
36 changes: 26 additions & 10 deletions app/class/Modelpage.php
Original file line number Diff line number Diff line change
Expand Up @@ -403,22 +403,38 @@ protected function reset(Page $page, array $reset): Page
*
* 1. This will compare edit and render dates
* 2. then if render file exists
* 3. then if the templatebody is set and has been updated
* 3. then if page have external links and
* - if some haven't been checked yet
* - or if it's been a long time
* - or if url cache is deleted
* 4. then if the templatebody is set and has been updated
*
* @param Page $page Page to be checked
*
* @return bool true if the page need to be rendered otherwise false
*/
public function needtoberendered(Page $page): bool
{
if ($page->daterender() <= $page->datemodif()) {
return true;
}
if (
$page->daterender() <= $page->datemodif() ||
!file_exists(self::HTML_RENDER_DIR . $page->id() . '.html') ||
!file_exists(self::ASSETS_RENDER_DIR . $page->id() . '.css') ||
!file_exists(self::ASSETS_RENDER_DIR . $page->id() . '.js')
!file_exists(self::HTML_RENDER_DIR . $page->id() . '.html')
|| !file_exists(self::ASSETS_RENDER_DIR . $page->id() . '.css')
|| !file_exists(self::ASSETS_RENDER_DIR . $page->id() . '.js')
) {
return true;
} elseif (!empty($page->templatebody())) {
}
if (count($page->externallinks()) > 0) {
$now = new DateTimeImmutable("now", timezone_open("Europe/Paris"));
if (
$page->daterender()->diff($now)->days > Serviceurlchecker::CACHE_EXPIRE_TIME
|| $page->uncheckedlinkcount() > 0
) {
return true;
}
}
if (!empty($page->templatebody())) {
try {
$bodytemplate = $this->get($page->templatebody());
return $page->daterender() <= $bodytemplate->datemodif();
Expand All @@ -437,17 +453,17 @@ public function needtoberendered(Page $page): bool
*
* @param Page $page
*
* @param bool $checkurl If true, URLs of rendered page will be checked
* @param ?Serviceurlchecker $urlchecker
*
* @return Page rendered $page
*
* @throws Runtimeexception if writing files to filesystem failed
*/
public function renderpage(Page $page, AltoRouter $router, bool $checkurl = false): Page
public function renderpage(Page $page, AltoRouter $router, ?Serviceurlchecker $urlchecker = null): Page
{
$now = new DateTimeImmutable("now", timezone_open("Europe/Paris"));

$params = [$router, $this, Config::externallinkblank(), Config::internallinkblank()];
$params = [$router, $this, Config::externallinkblank(), Config::internallinkblank(), $urlchecker];

switch ($page->version()) {
case Page::V1:
Expand All @@ -460,7 +476,7 @@ public function renderpage(Page $page, AltoRouter $router, bool $checkurl = fals
throw new DomainException('Page version is out of range');
}

$html = $renderengine->render($page, $checkurl);
$html = $renderengine->render($page);

Fs::dircheck(Model::ASSETS_RENDER_DIR, true, 0775);
Fs::dircheck(Model::HTML_RENDER_DIR, true, 0775);
Expand Down
18 changes: 15 additions & 3 deletions app/class/Page.php
Original file line number Diff line number Diff line change
Expand Up @@ -806,8 +806,8 @@ public function addtag($tag)

public function deadlinkcount(): int
{
$deadurls = array_filter($this->externallinks, function ($ok): bool {
return !$ok;
$deadurls = array_filter($this->externallinks, function ($status): bool {
return $status === false;
});
return count($deadurls);
}
Expand All @@ -821,12 +821,24 @@ public function externallinkstitle(): string
{
$links = $this->externallinks;
array_walk($links, function (&$value, string $key) {
$symbol = $value ? '' : '💀';
if (is_null($value)) {
$symbol = '🔍️';
} else {
$symbol = $value ? '' : '💀';
}
$value = $key . ' ' . $symbol;
});
return implode("\n", $links);
}

public function uncheckedlinkcount(): int
{
$uncheckedurls = array_filter($this->externallinks, function ($status): bool {
return is_null($status);
});
return count($uncheckedurls);
}


// _________________________________ T O O L S ______________________________________

Expand Down
20 changes: 12 additions & 8 deletions app/class/Servicerender.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,14 @@ public function __construct(
AltoRouter $router,
Modelpage $pagemanager,
bool $externallinkblank = false,
bool $internallinkblank = false
bool $internallinkblank = false,
?Serviceurlchecker $urlchecker = null
) {
$this->router = $router;
$this->pagemanager = $pagemanager;
$this->externallinkblank = $externallinkblank;
$this->internallinkblank = $internallinkblank;
$this->urlchecker = $urlchecker;
}


Expand All @@ -76,15 +78,13 @@ public function __construct(
*
* @return string HTML render of the page
*/
public function render(Page $page, bool $checkurl): string
public function render(Page $page): string
{
$this->page = $page;

$this->urlchecker = $checkurl ? new Serviceurlchecker() : null;

$html = $this->gethmtl();

if ($checkurl) {
if (!is_null($this->urlchecker)) {
try {
$this->urlchecker->savecache();
} catch (RuntimeException $e) {
Expand Down Expand Up @@ -395,9 +395,13 @@ protected function htmlparser(string $html): string
$url = filter_var($href, FILTER_SANITIZE_URL);
$this->urls[$url] = null;
if ($this->urlchecker !== null) {
$dead = $this->urlchecker->isdead($url);
$classes[] = $dead ? 'dead' : 'ok';
$this->urls[$url] = !$dead;
try {
$dead = $this->urlchecker->isdead($url);
$classes[] = $dead ? 'dead' : 'ok';
$this->urls[$url] = !$dead;
} catch (RuntimeException $e) {
// Web search limit reached
}
}
} elseif (preg_match('~^([a-z0-9-_]+)((\/?#[a-z0-9-_]+)|(\/([\w\-\%\[\]\=\?\&]*)))?$~', $href, $out)) {
$classes[] = 'internal';
Expand Down
4 changes: 2 additions & 2 deletions app/class/Servicerenderv1.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ class Servicerenderv1 extends Servicerender
*
* @return string HTML render of the page
*/
public function render(Page $page, bool $checkurl): string
public function render(Page $page): string
{
if (!$page instanceof Pagev1) {
throw new DomainException('Page should be only Pagev1');
}
return parent::render($page, $checkurl);
return parent::render($page);
}

public function renderprimary(Page $page): string
Expand Down
4 changes: 2 additions & 2 deletions app/class/Servicerenderv2.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ class Servicerenderv2 extends Servicerender
*
* @return string HTML render of the page
*/
public function render(Page $page, bool $checkurl): string
public function render(Page $page): string
{
if (!$page instanceof Pagev2) {
throw new DomainException('Page should be only Pagev2');
}
return parent::render($page, $checkurl);
return parent::render($page);
}

public function renderprimary(Page $page): string
Expand Down
53 changes: 45 additions & 8 deletions app/class/Serviceurlchecker.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

namespace Wcms;

use RuntimeException;
use Wcms\Exception\Filesystemexception;

/**
Expand All @@ -12,21 +13,43 @@ class Serviceurlchecker
/** @var array[] $urls */
protected array $urls = [];

/** @var int $starttimestamp timestamp lauched when object is build (in seconds) */
protected int $starttimestamp;

/** @var int $webchecktime time before stopping Web check (in seconds) */
protected int $webchecktime;

/** @var bool $cacheonly Limit URL checking to cache */
protected bool $cacheonly = false;

/** @var int MAX_BOUNCE limit of redirections to follow */
public const MAX_BOUNCE = 8;

/** @var int CACHE_EXPIRE_TIME in days */
public const CACHE_EXPIRE_TIME = 30;
public const CACHE_EXPIRE_TIME = 90;

/** @var null[] URL response code considered as not dead */
public const ACCEPTED_CODES = [
public const ACCEPTED_RESPONSE_CODES = [
200 => null,
401 => null,
403 => null,
];

public function __construct()
/**
* Tool that check for urls status, first in the cache, then on the Web
* The cache expires according to CACHE_EXPIRE_TIME constant
* A time limite have to be set to limit Web checking time
*
* @param int $webchecktime allocated time for looking URL on the Web (in seconds)
* if set to `0`, Check on the Web is disabled: only the cache is used
*/
public function __construct(int $webchecktime)
{
$this->webchecktime = $webchecktime;
if ($webchecktime === 0) {
$this->cacheonly = true;
}
$this->starttimestamp = time();
try {
$urlfile = Fs::readfile(Model::URLS_FILE);
$this->urls = json_decode($urlfile, true);
Expand All @@ -36,15 +59,21 @@ public function __construct()
}

/**
* Check if URL is dead according to ACCEPTED CODES
* Check if URL is dead according to ACCEPTED_RESPONSE_CODES
*
* @throws RuntimeException If time limit is reached and URL status is expired or not stored in cache
*/
public function isdead(string $url): bool
{
if (!$this->iscached($url)) {
if ($this->iscachedandvalid($url)) {
return !key_exists($this->urls[$url]['response'], self::ACCEPTED_RESPONSE_CODES);
}
if (!$this->cacheonly && time() < ($this->starttimestamp + $this->webchecktime)) {
$this->urls[$url]['response'] = $this->getresponse($url);
$this->urls[$url]['timestamp'] = time();
return !key_exists($this->urls[$url]['response'], self::ACCEPTED_RESPONSE_CODES);
}
return !key_exists($this->urls[$url]['response'], self::ACCEPTED_CODES);
throw new RuntimeException('Impossible to give a status about this URL');
}

/**
Expand All @@ -70,12 +99,20 @@ protected function getresponse(string $url): int
return 0;
}

protected function iscached(string $url): bool
/**
* Check if the status of URL is cached and has not expired
* If cache is expired, the entry is deleted
*/
protected function iscachedandvalid(string $url): bool
{
if (!key_exists($url, $this->urls)) {
return false;
}
return !($this->urls[$url]['timestamp'] < (time() - self::CACHE_EXPIRE_TIME * 3600 * 24));
if (($this->urls[$url]['timestamp'] + self::CACHE_EXPIRE_TIME * 3600 * 24) < time()) {
unset($this->urls[$url]);
return false;
}
return true;
}

/**
Expand Down
7 changes: 5 additions & 2 deletions app/view/templates/home.php
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,13 @@ class="redirection"
<td class="linkto"><?= $opt->linktolink($item->linkto('array')) ?></td>
<?php endif ?>
<?php if ($columns['externallinks']) : ?>
<td class="linkto" title="<?= $item->externallinkstitle() ?>">
<td class="externallinks" title="<?= $item->externallinkstitle() ?>">
<?= $item->externallinks('sort') ?>
<?php if (!empty($deadlinks = $item->deadlinkcount())) : ?>
<span class="deadlinks"><?= $deadlinks ?></span>
<span class="deadlinkcount"><?= $deadlinks ?></span>
<?php endif ?>
<?php if (!empty($uncheckedlinkcount = $item->uncheckedlinkcount())) : ?>
<span class="uncheckedlinkcount"><?= $uncheckedlinkcount ?></span>
<?php endif ?>
</td>
<?php endif ?>
Expand Down
13 changes: 10 additions & 3 deletions assets/css/home.css
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,21 @@ table .favicon img {
max-width: 32px;
}

table .deadlinks {
background-color: red;
color: white;
table .deadlinkcount, table .uncheckedlinkcount {
border-radius: 15px;
display: inline-block;
height: 17px;
width: 17px;
text-align: center;
color: white;
}

table .deadlinkcount {
background-color: red;
}

table .uncheckedlinkcount {
background-color: rgb(65, 65, 65);
}

td.title {
Expand Down
2 changes: 1 addition & 1 deletion tests/Servicerenderv1Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public function renderTest(string $name): void
{
$pagedata = json_decode(file_get_contents(__DIR__ . "/data/Servicerenderv1Test/$name.json"), true);
$page = new Pagev1($pagedata);
$html = $this->renderengine->render($page, false);
$html = $this->renderengine->render($page);

$expected = __DIR__ . "/data/Servicerenderv1Test/$name.html";
$actual = self::$tmpdir . "/$name.html";
Expand Down
Loading

0 comments on commit d6b8fff

Please sign in to comment.