Skip to content

Commit

Permalink
incremental expire for negative responses
Browse files Browse the repository at this point in the history
to avoid false negative, expire time start little and double each time it fail since last try.
URL cache now store timestamp and expire time. (need to flush cache if update from previous commit)
+ fixed : multi-render do not check urls anymore
  • Loading branch information
vincent-peugnet committed Nov 30, 2024
1 parent 45d9c61 commit c5c438a
Showing 1 changed file with 30 additions and 27 deletions.
57 changes: 30 additions & 27 deletions app/class/Serviceurlchecker.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,6 @@ class Serviceurlchecker
/** @var int MAX_BOUNCE limit of redirections to follow */
public const MAX_BOUNCE = 8;

/** @var int OK_CACHE_EXPIRE_TIME in days */
public const OK_CACHE_EXPIRE_TIME = 90;

/** @var int DEAD_CACHE_EXPIRE_TIME in minutes */
public const DEAD_CACHE_EXPIRE_TIME = 1;

/** @var null[] URL response code considered as not dead */
public const ACCEPTED_RESPONSE_CODES = [
200 => null,
Expand Down Expand Up @@ -73,11 +67,11 @@ public function __construct(int $timeout = 0)
public function check(string $url): bool
{
if ($this->iscachedandvalid($url)) {
return $this->isalive($this->urls[$url]['response']);
} else {
return $this->responseisaccepted($this->urls[$url]['response']);
} elseif (!$this->cacheonly) {
$this->queue[] = $url;
throw new RuntimeException('no status about this URL');
}
throw new RuntimeException('no status about this URL');
}

/**
Expand Down Expand Up @@ -163,27 +157,36 @@ public function processqueue(): int
foreach ($curlhandles as $url => $curlhandle) {
$curlerror = curl_errno($curlhandle);

if ($curlerror === CURLE_OK) {
$newurls[$url] = [
'response' => curl_getinfo($curlhandle, CURLINFO_HTTP_CODE),
'timestamp' => time(),
'expire' => time() + self::OK_CACHE_EXPIRE_TIME * 24 * 3600,
];
} elseif ($curlerror === CURLE_OPERATION_TIMEDOUT && count($this->queue) > 10) {
if ($curlerror === CURLE_OPERATION_TIMEDOUT && count($this->queue) > 10) {
// if queue was big, there is chances that timeout is due to curl saturation
// consider the link as unchecked
continue;
}

if ($curlerror !== CURLE_OK) {
$response = $curlerror;
} else {
if (key_exists($url, $this->urls) && $this->isalive($this->urls[$url]['response'])) {
$expire = time() + (time() - $this->urls[$url]['timestamp']) * 2;
} else {
$expire = time() + self::DEAD_CACHE_EXPIRE_TIME * 60;
}
$newurls[$url] = [
'response' => $curlerror,
'timestamp' => time(),
'expire' => $expire,
];
$response = curl_getinfo($curlhandle, CURLINFO_HTTP_CODE);
}

if ($this->responseisaccepted($response) || $response === 404) {
$expire = time() + 80 * 24 * 3600 + rand(0, 40 * 24 * 3600); // 100 +-20 days
} elseif (key_exists($url, $this->urls) && !$this->responseisaccepted($this->urls[$url]['response'])) {
// If it was already an error before: expire in twice the time since previous timestamp
$expire = time() + (time() - $this->urls[$url]['timestamp']) * 2;
} elseif ($response === 429) { // Too many request: let's expire in one hour to avoid another one
$expire = time() + 3600;
} elseif ($response > 200) { // default to ten minutes for other error codes
$expire = time() + 600;
} else { // for curl error : expire in one minute
$expire = time() + 60;
}

$newurls[$url] = [
'response' => $response,
'timestamp' => time(),
'expire' => $expire,
];
}

curl_multi_close($multihandle);
Expand All @@ -198,7 +201,7 @@ public function processqueue(): int
*
* @return bool Indicate if code mean alive or not.
*/
public static function isalive(int $response): bool
public static function responseisaccepted(int $response): bool
{
return key_exists($response, self::ACCEPTED_RESPONSE_CODES);
}
Expand Down

0 comments on commit c5c438a

Please sign in to comment.