Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Negotiate cache_expiration_time #26

Merged
merged 15 commits into from
Sep 20, 2024
79 changes: 79 additions & 0 deletions src/HTTP/Utils.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?php

declare(strict_types=1);

namespace SimplePie\HTTP;

/**
* HTTP util functions
* FreshRSS
* @internal
*/
final class Utils
{
/**
* Extracts `max-age` from the `Cache-Control` HTTP headers
*
* @param array<string,mixed> $http_headers HTTP headers of the response
* @return int|null The `max-age` value or `null` if not found
*
* FreshRSS
*/
public static function get_http_max_age(array $http_headers): ?int
{
$cache_control = $http_headers['cache-control'] ?? null;
if (is_string($cache_control) && preg_match('/\bmax-age=(\d+)\b/', $cache_control, $matches)) {
return (int) $matches[1];
}
return null;
}

/**
* Negotiate the cache expiration time based on the HTTP response headers.
* Return the cache duration time in number of seconds since the Unix Epoch, accounting for:
* - `Cache-Control: max-age` minus `Age`, bounded by `$cache_duration_min` and `$cache_duration_max`
* - `Cache-Control: must-revalidate` will set `$cache_duration` to `$cache_duration_min`
* - `Cache-Control: no-cache` will return `time() + $cache_duration_min`
* - `Cache-Control: no-store` will return `time() + $cache_duration_min - 3`
* - `Expires` like `Cache-Control: max-age` but only if it is absent
*
* @param array<string,mixed> $http_headers HTTP headers of the response
* @param int $cache_duration Desired cache duration in seconds, potentially overridden by HTTP response headers
* @param int $cache_duration_min Minimal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`,
* @param int $cache_duration_max Maximal cache duration (in seconds), overriding HTTP response headers `Cache-Control: max-age` and `Expires`,
* @return int The negotiated cache expiration time in seconds since the Unix Epoch
*
* FreshRSS
*/
public static function negociate_cache_expiration_time(array $http_headers, int $cache_duration, int $cache_duration_min, int $cache_duration_max): int
{
$cache_control = $http_headers['cache-control'] ?? '';
if ($cache_control !== '') {
if (preg_match('/\bno-store\b/', $cache_control)) {
return time() + $cache_duration_min - 3; // -3 to distinguish from no-cache if needed
}
if (preg_match('/\bno-cache\b/', $cache_control)) {
return time() + $cache_duration_min;
}
if (preg_match('/\bmust-revalidate\b/', $cache_control)) {
$cache_duration = $cache_duration_min;
}
if (preg_match('/\bmax-age=(\d+)\b/', $cache_control, $matches)) {
$max_age = (int) $matches[1];
$age = $http_headers['age'] ?? '';
if (is_numeric($age)) {
$max_age -= (int) $age;
}
return time() + min(max($max_age, $cache_duration), $cache_duration_max);
}
}
$expires = $http_headers['expires'] ?? '';
if ($expires !== '') {
$expire_date = \SimplePie\Misc::parse_date($expires);
if ($expire_date !== false) {
return min(max($expire_date, time() + $cache_duration), time() + $cache_duration_max);
}
}
return time() + $cache_duration;
}
}
73 changes: 61 additions & 12 deletions src/SimplePie.php
Original file line number Diff line number Diff line change
Expand Up @@ -509,12 +509,28 @@ class SimplePie
public $force_cache_fallback = false;

/**
* @var int Cache duration (in seconds)
* @var int Cache duration (in seconds), but may be overridden by HTTP response headers (FreshRSS)
* @see SimplePie::set_cache_duration()
* @access private
*/
public $cache_duration = 3600;

/**
* @var int Minimal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`
* @see SimplePie::set_cache_duration()
* @access private
* FreshRSS
*/
public $cache_duration_min = 60;

/**
* @var int Maximal cache duration (in seconds), overriding HTTP response headers `Cache-Control` and `Expires`
* @see SimplePie::set_cache_duration()
* @access private
* FreshRSS
*/
public $cache_duration_max = 86400;

/**
* @var int Auto-discovery cache duration (in seconds)
* @see SimplePie::set_autodiscovery_cache_duration()
Expand Down Expand Up @@ -989,12 +1005,26 @@ public function force_cache_fallback(bool $enable = false)
* Set the length of time (in seconds) that the contents of a feed will be
* cached
*
* @param int $seconds The feed content cache duration
* FreshRSS: The cache is (partially) HTTP compliant, with the following rules:
*
* @param int $seconds The feed content cache duration, which may be overridden by HTTP response headers)
* @param int $min The minimum cache duration (default: 60s), overriding HTTP response headers `Cache-Control` and `Expires`
* @param int $max The maximum cache duration (default: 24h), overriding HTTP response headers `Cache-Control` and `Expires`
* @return void
*/
public function set_cache_duration(int $seconds = 3600)
public function set_cache_duration(int $seconds = 3600, ?int $min = null, ?int $max = null)
{
$this->cache_duration = $seconds;
$this->cache_duration = max(0, $seconds);
if (is_int($min)) { // FreshRSS
$this->cache_duration_min = min(max(0, $min), $seconds);
} elseif ($this->cache_duration_min > $seconds) {
$this->cache_duration_min = $seconds;
}
if (is_int($max)) { // FreshRSS
$this->cache_duration_max = max($seconds, $max);
} elseif ($this->cache_duration_max < $seconds) {
$this->cache_duration_max = $seconds;
}
}

/**
Expand Down Expand Up @@ -1851,7 +1881,7 @@ public function init()
$this->data['hash'] = $this->data['hash'] ?? $this->clean_hash($this->raw_data); // FreshRSS

// Cache the file if caching is enabled
$this->data['cache_expiration_time'] = $this->cache_duration + time();
$this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max);

if ($cache && !$cache->set_data($this->get_cache_filename($this->feed_url), $this->data, $this->cache_duration)) {
trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
Expand Down Expand Up @@ -1972,8 +2002,10 @@ protected function fetch_data(&$cache)
$this->status_code = 0;

if ($this->force_cache_fallback) {
$this->data['cache_expiration_time'] = $this->cache_duration + time(); // FreshRSS
$cache->set_data($cacheKey, $this->data, $this->cache_duration);
$this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max); // FreshRSS
if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { // FreshRSS
trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}

return true;
}
Expand All @@ -1986,13 +2018,28 @@ protected function fetch_data(&$cache)
// is still valid.
$this->raw_data = false;
if (isset($file)) { // FreshRSS
$old_cache_control = $this->data['headers']['cache-control'] ?? '';
$old_max_age = \SimplePie\HTTP\Utils::get_http_max_age($this->data['headers']);

// Update cache metadata
$this->data['cache_expiration_time'] = $this->cache_duration + time();
$this->data['headers'] = array_map(function (array $values): string {
return implode(',', $values);
}, $file->get_headers());

// Workaround for buggy servers returning wrong cache-control headers for 304 responses
if ($old_max_age !== null) {
$new_max_age = \SimplePie\HTTP\Utils::get_http_max_age($this->data['headers']);
if ($new_max_age === null || $new_max_age > $old_max_age) {
// Allow servers to return a shorter cache duration for 304 responses, but not longer
$this->data['headers']['cache-control'] = $old_cache_control;
}
}

$this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max);
}
if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) { // FreshRSS
trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}
$cache->set_data($cacheKey, $this->data, $this->cache_duration);

return true;
}
Expand All @@ -2001,11 +2048,13 @@ protected function fetch_data(&$cache)
$hash = $this->clean_hash($file->get_body_content());
if (($this->data['hash'] ?? null) === $hash) {
// Update cache metadata
$this->data['cache_expiration_time'] = $this->cache_duration + time();
$this->data['headers'] = array_map(function (array $values): string {
return implode(',', $values);
}, $file->get_headers());
$cache->set_data($cacheKey, $this->data, $this->cache_duration);
$this->data['cache_expiration_time'] = \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max);
if (!$cache->set_data($cacheKey, $this->data, $this->cache_duration)) {
trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}

return true; // Content unchanged even though server did not send a 304
} else {
Expand Down Expand Up @@ -2138,7 +2187,7 @@ protected function fetch_data(&$cache)
'url' => $this->feed_url,
'feed_url' => $file->get_final_requested_uri(),
'build' => Misc::get_build(),
'cache_expiration_time' => $this->cache_duration + time(),
'cache_expiration_time' => \SimplePie\HTTP\Utils::negociate_cache_expiration_time($this->data['headers'] ?? [], $this->cache_duration, $this->cache_duration_min, $this->cache_duration_max), // FreshRSS
'cache_version' => self::CACHE_VERSION, // FreshRSS
'hash' => empty($hash) ? $this->clean_hash($file->get_body_content()) : $hash, // FreshRSS
];
Expand Down
Loading