From 4c3e515d7ba45fe8e526b794b247c55ba4bac930 Mon Sep 17 00:00:00 2001 From: Alastair Weakley Date: Thu, 25 Jan 2024 14:53:59 +1100 Subject: [PATCH 01/26] Move keyring into database --- testproject/home/tests.py | 125 +++++++++++++++++- wagtailcache/cache.py | 27 ++-- wagtailcache/migrations/0001_initial.py | 48 +++++++ wagtailcache/migrations/__init__.py | 0 wagtailcache/models.py | 86 ++++++++++++ wagtailcache/settings.py | 1 + .../templates/wagtailcache/index.html | 9 +- wagtailcache/utils.py | 16 +++ wagtailcache/views.py | 5 +- 9 files changed, 290 insertions(+), 27 deletions(-) create mode 100644 wagtailcache/migrations/0001_initial.py create mode 100644 wagtailcache/migrations/__init__.py create mode 100644 wagtailcache/models.py create mode 100644 wagtailcache/utils.py diff --git a/testproject/home/tests.py b/testproject/home/tests.py index 88fd078..b525918 100644 --- a/testproject/home/tests.py +++ b/testproject/home/tests.py @@ -1,3 +1,4 @@ +import datetime import time from django.contrib.auth.models import User @@ -7,6 +8,7 @@ from django.test import modify_settings from django.test import override_settings from django.urls import reverse +from django.utils.timezone import now from wagtail import hooks from wagtail.models import PageViewRestriction @@ -19,6 +21,8 @@ from wagtailcache.cache import CacheControl from wagtailcache.cache import Status from wagtailcache.cache import clear_cache +from wagtailcache.cache import Status +from wagtailcache.models import KeyringItem from wagtailcache.settings import wagtailcache_settings @@ -510,14 +514,32 @@ def test_admin_clearcache(self): def test_cache_keyring(self): # Check if keyring is not present - self.assertEqual(self.cache.get("keyring"), None) + self.assertEqual(KeyringItem.objects.count(), 0) # Get should hit cache. self.get_miss(self.page_cachedpage.get_url()) + self.assertEqual(KeyringItem.objects.count(), 1) # Get first key from keyring - key = next(iter(self.cache.get("keyring"))) url = "http://%s%s" % ("testserver", self.page_cachedpage.get_url()) + keyring_item = KeyringItem.objects.active_for_urls(url).first() # Compare Keys - self.assertEqual(key, url) + self.assertEqual(keyring_item.url, url) + + @override_settings(WAGTAIL_CACHE_BACKEND="one_second") + def test_cache_keyring_no_uri_key_duplication(self): + # First get to populate keyring + self.get_miss(self.page_cachedpage.get_url()) + # Wait a short time + time.sleep(0.5) + # Fetch a different page + self.get_miss(self.page_wagtailpage.get_url()) + # Wait until the first page is expired, but not the keyring + time.sleep(0.6) + # Fetch the first page again + self.get_miss(self.page_cachedpage.get_url()) + # Check the keyring does not contain duplicate uri_keys + url = "http://%s%s" % ("testserver", self.page_cachedpage.get_url()) + keyring = self.cache.get("keyring") + self.assertEqual(len(keyring.get(url, [])), 1) @override_settings(WAGTAIL_CACHE_BACKEND="one_second") def test_cache_keyring_no_uri_key_duplication(self): @@ -684,3 +706,100 @@ def test_response_hook_any(self): self.assertEqual(hook_fns, [hook_any]) # The page should be cached normally due to hook returning garbage. self.test_page_hit() + + # ---- MODELS -------------------------------------------------------------- + def test_keyring_update_or_create(self): + expiry = now() + datetime.timedelta(hours=1) + key = "abc123" + url = "https://example.com/" + + KeyringItem.objects.set( + expiry=expiry, + key=key, + url=url, + ) + self.assertEqual(KeyringItem.objects.count(), 1) + self.assertEqual(KeyringItem.objects.first().url, url) + + expiry2 = now() + datetime.timedelta(hours=1) + KeyringItem.objects.set( + expiry=expiry2, + key=key, + url=url, + ) + self.assertEqual(KeyringItem.objects.count(), 1) + self.assertEqual(KeyringItem.objects.first().expiry, expiry2) + + def test_delete_expired(self): + """ + Cache items expire by themselves, so we only need to actively + delete database items + """ + expiry1 = now() + datetime.timedelta(seconds=1) + expiry2 = now() + datetime.timedelta(seconds=2) + used_keys = [] + + for exp in [expiry1, expiry2]: + key = f"key-{exp}" + url = f"https://example.com/{exp}" + KeyringItem.objects.set( + expiry=exp, + key=key, + url=url, + ) + # Item should not expire + self.cache.set(key, url, 100) + used_keys.append(key) + self.assertEqual(KeyringItem.objects.count(), 2) + time.sleep(1) + KeyringItem.objects.clear_expired() + self.assertEqual(KeyringItem.objects.count(), 1) + + # Cache items remain + for key in used_keys: + self.assertTrue(self.cache.get(key)) + + @override_settings(WAGTAIL_CACHE_BATCH_SIZE=2) + def test_bulk_delete(self): + """ + Bulk delete removes cache items and database items that refer to them + """ + timeout = 10 + expiry = now() + datetime.timedelta(seconds=timeout) + keys = [f"key-{counter}" for counter in range(8)] + + for key in keys: + url = "https://example.com/" + KeyringItem.objects.set( + expiry=expiry, + key=key, + url=url, + ) + self.cache.set(key, url, timeout) + + KeyringItem.objects.bulk_delete_cache_keys(keys[:4]) + + for key in keys[:4]: + self.assertFalse(KeyringItem.objects.filter(key=key).exists()) + self.assertFalse(self.cache.get(key)) + + for key in keys[4:]: + self.assertTrue(KeyringItem.objects.filter(key=key).exists()) + self.assertTrue(self.cache.get(key)) + + def test_active_for_urls(self): + past_expiry = now() - datetime.timedelta(seconds=1) + future_expiry = now() + datetime.timedelta(seconds=1) + url = "https://example.com" + + KeyringItem.objects.set( + expiry=past_expiry, + key="key", + url=url, + ) + KeyringItem.objects.set( + expiry=future_expiry, + key="key-2", + url=url, + ) + self.assertEqual(KeyringItem.objects.active_for_urls(url).count(), 1) diff --git a/wagtailcache/cache.py b/wagtailcache/cache.py index 0d92c75..5a0b519 100644 --- a/wagtailcache/cache.py +++ b/wagtailcache/cache.py @@ -3,6 +3,7 @@ """ import logging +import datetime import re from enum import Enum from functools import wraps @@ -24,8 +25,10 @@ from django.utils.cache import learn_cache_key from django.utils.cache import patch_response_headers from django.utils.deprecation import MiddlewareMixin +from django.utils.timezone import now from wagtail import hooks +from wagtailcache.models import KeyringItem from wagtailcache.settings import wagtailcache_settings @@ -378,25 +381,15 @@ def clear_cache(urls: List[str] = []) -> None: return _wagcache = caches[wagtailcache_settings.WAGTAIL_CACHE_BACKEND] - if urls and "keyring" in _wagcache: - keyring = _wagcache.get("keyring") - # Check the provided URL matches a key in our keyring. - matched_urls = [] - for regex in urls: - for key in keyring: - if re.match(regex, key): - matched_urls.append(key) - # If it matches, delete each entry from the cache, - # and delete the URL from the keyring. - for url in matched_urls: - entries = keyring.get(url, []) - for cache_key in entries: - _wagcache.delete(cache_key) - del keyring[url] - # Save the keyring. - _wagcache.set("keyring", keyring) + if urls: + active_keys = KeyringItem.objects.active_for_urls(urls).values_list( + "key", flat=True + ) + # Delete the keys from the cache and the keyring + KeyringItem.objects.bulk_delete_cache_keys(active_keys) # Clears the entire cache backend used by wagtail-cache. else: + KeyringItem.objects.all().delete() _wagcache.clear() diff --git a/wagtailcache/migrations/0001_initial.py b/wagtailcache/migrations/0001_initial.py new file mode 100644 index 0000000..c29c05f --- /dev/null +++ b/wagtailcache/migrations/0001_initial.py @@ -0,0 +1,48 @@ +# Generated by Django 4.1.9 on 2024-01-25 03:33 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + initial = True + + dependencies: list[tuple[str, str]] = [] + + operations = [ + migrations.CreateModel( + name="KeyringItem", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("expiry", models.DateTimeField()), + ("key", models.CharField(max_length=512)), + ("url", models.URLField()), + ], + options={ + "ordering": ["url"], + }, + ), + migrations.AddIndex( + model_name="keyringitem", + index=models.Index(fields=["expiry"], name="wagtailcach_expiry_b9702b_idx"), + ), + migrations.AddIndex( + model_name="keyringitem", + index=models.Index(fields=["key"], name="wagtailcach_key_0c2934_idx"), + ), + migrations.AddIndex( + model_name="keyringitem", + index=models.Index(fields=["url"], name="wagtailcach_url_04699f_idx"), + ), + migrations.AlterUniqueTogether( + name="keyringitem", + unique_together={("url", "key")}, + ), + ] diff --git a/wagtailcache/migrations/__init__.py b/wagtailcache/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/wagtailcache/models.py b/wagtailcache/models.py new file mode 100644 index 0000000..525814a --- /dev/null +++ b/wagtailcache/models.py @@ -0,0 +1,86 @@ +from django.core.cache import caches +from django.db import models +from django.db.models import Q +from django.utils.timezone import now + +from wagtailcache.settings import wagtailcache_settings +from wagtailcache.utils import batched + + +class KeyringItemManager(models.Manager): + def __init__(self): + super().__init__() + self._wagcache = caches[wagtailcache_settings.WAGTAIL_CACHE_BACKEND] + + def set(self, url, key, expiry) -> models.Model: + """ + Create or update a keyring item, clearing expired items too. + """ + item, _ = self.update_or_create( + defaults={"expiry": expiry}, + url=url, + key=key, + ) + self.clear_expired() + return item + + def bulk_delete_cache_keys(self, keys: list[str]) -> None: + """ + Bulk delete the keys that exist, in batches + """ + existing_keys = self.filter(key__in=keys) + # Delete from cache + for key_batch in batched( + existing_keys.values_list("key", flat=True), + wagtailcache_settings.WAGTAIL_CACHE_BATCH_SIZE, + ): + self._wagcache.delete_many(key_batch) + # Delete from database + existing_keys.delete() + + def clear_expired(self) -> None: + """ + Clear all items whose expiry has passed. + """ + self.filter(expiry__lt=now()).delete() + + def active(self): + return self.filter(expiry__gt=now()) + + def active_for_urls(self, urls): + if urls is None: + urls = [] + if not isinstance(urls, (list, tuple)): + urls = list(urls) + qs = self.active() + if not urls: + return qs + filter_set = Q(url__regex=urls[0]) + for url in urls[1:]: + filter_set = filter_set | Q(url__regex=url) + return qs.filter(filter_set) + + +class KeyringItem(models.Model): + """ + KeyringItems relate the URL of a page on the site to the key of an item + in the cache. + """ + + expiry = models.DateTimeField() + key = models.CharField(max_length=512) + url = models.URLField() + + objects = KeyringItemManager() + + class Meta: + ordering = ["url"] + indexes = [ + models.Index(fields=["expiry"]), + models.Index(fields=["key"]), + models.Index(fields=["url"]), + ] + unique_together = [["url", "key"]] + + def __str__(self): + return f"{self.url} -> {self.key} (Expires: {self.expiry})" diff --git a/wagtailcache/settings.py b/wagtailcache/settings.py index d56c9a6..7e01140 100644 --- a/wagtailcache/settings.py +++ b/wagtailcache/settings.py @@ -10,6 +10,7 @@ class _DefaultSettings: WAGTAIL_CACHE = True WAGTAIL_CACHE_BACKEND = "default" + WAGTAIL_CACHE_BATCH_SIZE = 100 WAGTAIL_CACHE_HEADER = "X-Wagtail-Cache" WAGTAIL_CACHE_IGNORE_COOKIES = True WAGTAIL_CACHE_IGNORE_QS = [ diff --git a/wagtailcache/templates/wagtailcache/index.html b/wagtailcache/templates/wagtailcache/index.html index 58aaca0..31b53a3 100644 --- a/wagtailcache/templates/wagtailcache/index.html +++ b/wagtailcache/templates/wagtailcache/index.html @@ -41,12 +41,13 @@

{% trans "Contents" %}

{% trans "Note that 301/302 redirects and 404s may also be cached." %}