Skip to content

Commit

Permalink
✨(documents) allow retrieving versions (list and detail)
Browse files Browse the repository at this point in the history
Versions are retrieved directly from object storage and served on API
endpoints. We make sure a user who is given access to a document will
only see versions that were created after s.he gained access.
  • Loading branch information
sampaccoud committed Apr 22, 2024
1 parent 565660b commit 47012db
Show file tree
Hide file tree
Showing 7 changed files with 765 additions and 23 deletions.
11 changes: 10 additions & 1 deletion src/backend/core/api/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@

from rest_framework import permissions

ACTION_FOR_METHOD_TO_PERMISSION = {
"versions_detail": {"DELETE": "versions_destroy", "GET": "versions_retrieve"}
}


class IsAuthenticated(permissions.BasePermission):
"""
Expand Down Expand Up @@ -60,4 +64,9 @@ class AccessPermission(permissions.BasePermission):
def has_object_permission(self, request, view, obj):
"""Check permission for a given object."""
abilities = obj.get_abilities(request.user)
return abilities.get(view.action, False)
action = view.action
try:
action = ACTION_FOR_METHOD_TO_PERMISSION[view.action][request.method]
except KeyError:
pass
return abilities.get(action, False)
60 changes: 59 additions & 1 deletion src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""API endpoints"""
import json
from io import BytesIO

from django.contrib.postgres.aggregates import ArrayAgg
Expand All @@ -7,8 +8,9 @@
Q,
Subquery,
)
from django.http import FileResponse
from django.http import FileResponse, Http404

from botocore.exceptions import ClientError
from rest_framework import (
decorators,
exceptions,
Expand Down Expand Up @@ -291,6 +293,62 @@ class DocumentViewSet(
resource_field_name = "document"
queryset = models.Document.objects.all()

@decorators.action(detail=True, methods=["get"], url_path="versions")
def versions_list(self, request, *args, **kwargs):
"""
Return the document's versions but only those created after the user got access
to the document
"""
document = self.get_object()
from_datetime = min(
access.created_at
for access in document.accesses.filter(
Q(user=request.user) | Q(team__in=request.user.get_teams()),
)
)
return drf_response.Response(
document.get_versions_slice(from_datetime=from_datetime)
)

@decorators.action(
detail=True,
methods=["get", "delete"],
url_path="versions/(?P<version_id>[0-9a-f-]{36})",
)
# pylint: disable=unused-argument
def versions_detail(self, request, pk, version_id, *args, **kwargs):
"""Custom action to retrieve a specific version of a document"""
document = self.get_object()

try:
response = document.get_content_response(version_id=version_id)
except (FileNotFoundError, ClientError) as err:
raise Http404 from err

# Don't let users access versions that were created before they were given access
# to the document
from_datetime = min(
access.created_at
for access in document.accesses.filter(
Q(user=request.user) | Q(team__in=request.user.get_teams()),
)
)
if response["LastModified"] < from_datetime:
raise Http404

if request.method == "DELETE":
response = document.delete_version(version_id)
return drf_response.Response(
status=response["ResponseMetadata"]["HTTPStatusCode"]
)

return drf_response.Response(
{
"content": json.loads(response["Body"].read()),
"last_modified": response["LastModified"],
}
)


class DocumentAccessViewSet(
ResourceAccessViewsetMixin,
Expand Down
103 changes: 98 additions & 5 deletions src/backend/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import frontmatter
import markdown
from botocore.exceptions import ClientError
from timezone_field import TimeZoneField
from weasyprint import CSS, HTML
from weasyprint.text.fonts import FontConfiguration
Expand Down Expand Up @@ -264,16 +265,23 @@ class Meta:
def __str__(self):
return self.title

@property
def file_key(self):
"""Key of the object storage file to which the document content is stored"""
if not self.pk:
return None
return str(self.pk)

@property
def content(self):
"""Return the json content from object storage if available"""
if self._content is None and self.id:
try:
# Load content from object storage
with default_storage.open(str(self.id)) as f:
self._content = json.load(f)
except FileNotFoundError:
response = self.get_content_response()
except (FileNotFoundError, ClientError):
pass
else:
self._content = json.loads(response["Body"].read())
return self._content

@content.setter
Expand All @@ -285,12 +293,18 @@ def content(self, content):
raise ValueError("content should be a json object.")
self._content = content

def get_content_response(self, version_id=""):
"""Get the content in a specific version of the document"""
return default_storage.connection.meta.client.get_object(
Bucket=default_storage.bucket_name, Key=self.file_key, VersionId=version_id
)

def save(self, *args, **kwargs):
"""Write content to object storage only if _content has changed."""
super().save(*args, **kwargs)

if self._content:
file_key = str(self.pk)
file_key = self.file_key
bytes_content = json.dumps(self._content).encode("utf-8")

if default_storage.exists(file_key):
Expand All @@ -307,6 +321,81 @@ def save(self, *args, **kwargs):
content_file = ContentFile(bytes_content)
default_storage.save(file_key, content_file)

def get_versions_slice(
self, from_version_id="", from_datetime=None, page_size=None
):
"""Get document versions from object storage with pagination and starting conditions"""
# /!\ Trick here /!\
# The "KeyMarker" and "VersionIdMarker" fields must either be both set or both not set.
# The error we get otherwise is not helpful at all.
token = {}
if from_version_id:
token.update(
{"KeyMarker": self.file_key, "VersionIdMarker": from_version_id}
)

if from_datetime:
response = default_storage.connection.meta.client.list_object_versions(
Bucket=default_storage.bucket_name,
Prefix=self.file_key,
MaxKeys=settings.S3_VERSIONS_PAGE_SIZE,
**token,
)

# Find the first version after the given datetime
version = None
for version in response.get("Versions", []):
if version["LastModified"] >= from_datetime:
token = {
"KeyMarker": self.file_key,
"VersionIdMarker": version["VersionId"],
}
break
else:
if version is None or version["LastModified"] < from_datetime:
if response["NextVersionIdMarker"]:
return self.get_versions_slice(
from_version_id=response["NextVersionIdMarker"],
page_size=settings.S3_VERSIONS_PAGE_SIZE,
from_datetime=from_datetime,
)
return {
"next_version_id_marker": "",
"is_truncated": False,
"versions": [],
}

response = default_storage.connection.meta.client.list_object_versions(
Bucket=default_storage.bucket_name,
Prefix=self.file_key,
MaxKeys=min(page_size, settings.S3_VERSIONS_PAGE_SIZE)
if page_size
else settings.S3_VERSIONS_PAGE_SIZE,
**token,
)
return {
"next_version_id_marker": response["NextVersionIdMarker"],
"is_truncated": response["IsTruncated"],
"versions": [
{
key_snake: version[key_camel]
for key_camel, key_snake in [
("ETag", "etag"),
("IsLatest", "is_latest"),
("LastModified", "last_modified"),
("VersionId", "version_id"),
]
}
for version in response.get("Versions", [])
],
}

def delete_version(self, version_id):
"""Delete a version from object storage given its version id"""
return default_storage.connection.meta.client.delete_object(
Bucket=default_storage.bucket_name, Key=self.file_key, VersionId=version_id
)

def get_abilities(self, user):
"""
Compute and return abilities for a given user on the document.
Expand All @@ -316,9 +405,13 @@ def get_abilities(self, user):
set(roles).intersection({RoleChoices.OWNER, RoleChoices.ADMIN})
)
can_get = self.is_public or bool(roles)
can_get_versions = bool(roles)

return {
"destroy": RoleChoices.OWNER in roles,
"versions_destroy": is_owner_or_admin,
"versions_list": can_get_versions,
"versions_retrieve": can_get_versions,
"manage_accesses": is_owner_or_admin,
"update": is_owner_or_admin,
"partial_update": is_owner_or_admin,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ def test_api_documents_retrieve_anonymous_public():
"partial_update": False,
"retrieve": True,
"update": False,
"versions_destroy": False,
"versions_list": False,
"versions_retrieve": False,
},
"accesses": [],
"title": document.title,
Expand Down Expand Up @@ -66,6 +69,9 @@ def test_api_documents_retrieve_authenticated_unrelated_public():
"partial_update": False,
"retrieve": True,
"update": False,
"versions_destroy": False,
"versions_list": False,
"versions_retrieve": False,
},
"accesses": [],
"title": document.title,
Expand Down
Loading

0 comments on commit 47012db

Please sign in to comment.