Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

♻️(backend) allow uploading more types of attachments #309

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to

## Added

- ✨(backend) allow uploading more types of attachments #309
- ✨(backend) add name fields to the user synchronized with OIDC #301
- ✨(ci) add security scan #291
- ✨(frontend) Activate versions feature #240
Expand Down
11 changes: 6 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ ENV PYTHONUNBUFFERED=1

# Install required system libs
RUN apk add \
gettext \
cairo \
libffi-dev \
file \
font-noto \
font-noto-emoji \
gettext \
gdk-pixbuf \
pango \
libffi-dev \
pandoc \
font-noto-emoji \
font-noto \
pango \
shared-mime-info

# Copy entrypoint
Expand Down
35 changes: 28 additions & 7 deletions src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from django.db.models import Q
from django.utils.translation import gettext_lazy as _

import magic
from rest_framework import exceptions, serializers

from core import models
Expand Down Expand Up @@ -218,16 +219,36 @@ def validate_file(self, file):
f"File size exceeds the maximum limit of {max_size:d} MB."
)

# Validate file type
mime_type, _ = mimetypes.guess_type(file.name)
if mime_type not in settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES:
mime_types = ", ".join(settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES)
raise serializers.ValidationError(
f"File type '{mime_type:s}' is not allowed. Allowed types are: {mime_types:s}"
)
extension = file.name.rpartition(".")[-1] if "." in file.name else None

# Read the first few bytes to determine the MIME type accurately
mime = magic.Magic(mime=True)
magic_mime_type = mime.from_buffer(file.read(1024))
file.seek(0) # Reset file pointer to the beginning after reading

if magic_mime_type in settings.DOCUMENT_UNSAFE_MIME_TYPES:
self.context["is_unsafe"] = True
sampaccoud marked this conversation as resolved.
Show resolved Hide resolved

extension_mime_type, _ = mimetypes.guess_type(file.name)

# Try guessing a coherent extension from the mimetype
if extension_mime_type != magic_mime_type:
if guessed_ext := mimetypes.guess_extension(magic_mime_type):
extension = guessed_ext[1:]

if extension is None:
raise serializers.ValidationError("Could not determine file extension.")

self.context["expected_extension"] = extension

return file

def validate(self, attrs):
"""Override validate to add the computed extension to validated_data."""
attrs["expected_extension"] = self.context.get("expected_extension")
sampaccoud marked this conversation as resolved.
Show resolved Hide resolved
attrs["is_unsafe"] = self.context.get("is_unsafe")
sampaccoud marked this conversation as resolved.
Show resolved Hide resolved
return attrs


class TemplateSerializer(BaseResourceSerializer):
"""Serialize templates."""
Expand Down
18 changes: 12 additions & 6 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""API endpoints"""

import os
import re
import uuid
from urllib.parse import urlparse
Expand Down Expand Up @@ -476,15 +475,22 @@ def attachment_upload(self, request, *args, **kwargs):
return drf_response.Response(
serializer.errors, status=status.HTTP_400_BAD_REQUEST
)
# Extract the file extension from the original filename
file = serializer.validated_data["file"]
extension = os.path.splitext(file.name)[1]

# Generate a generic yet unique filename to store the image in object storage
file_id = uuid.uuid4()
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}{extension:s}"
extension = serializer.validated_data["expected_extension"]
key = f"{document.key_base}/{ATTACHMENTS_FOLDER:s}/{file_id!s}.{extension:s}"

# Prepare metadata for storage
metadata = {"Metadata": {"owner": str(request.user.id)}}
if serializer.validated_data.get("is_unsafe", False):
sampaccoud marked this conversation as resolved.
Show resolved Hide resolved
metadata["Metadata"]["is_unsafe"] = "true"

file = serializer.validated_data["file"]
default_storage.connection.meta.client.upload_fileobj(
file, default_storage.bucket_name, key, ExtraArgs=metadata
)

default_storage.save(key, file)
return drf_response.Response(
{"file": f"{settings.MEDIA_URL:s}{key:s}"}, status=status.HTTP_201_CREATED
)
Expand Down
124 changes: 100 additions & 24 deletions src/backend/core/tests/documents/test_api_documents_attachment_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import re
import uuid

from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from django.core.files.uploadedfile import SimpleUploadedFile

import pytest
Expand All @@ -16,6 +16,12 @@

pytestmark = pytest.mark.django_db

PIXEL = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00"
b"\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\xf8\xff\xff?\x00\x05\xfe\x02\xfe"
b"\xa7V\xbd\xfa\x00\x00\x00\x00IEND\xaeB`\x82"
)


@pytest.mark.parametrize(
"reach, role",
Expand All @@ -33,7 +39,7 @@ def test_api_documents_attachment_upload_anonymous_forbidden(reach, role):
and role don't allow it.
"""
document = factories.DocumentFactory(link_reach=reach, link_role=role)
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = APIClient().post(url, {"file": file}, format="multipart")
Expand All @@ -50,14 +56,14 @@ def test_api_documents_attachment_upload_anonymous_success():
if the link reach and role permit it.
"""
document = factories.DocumentFactory(link_reach="public", link_role="editor")
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = APIClient().post(url, {"file": file}, format="multipart")

assert response.status_code == 201

pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(response.json()["file"])
file_id = match.group(1)

Expand Down Expand Up @@ -85,7 +91,7 @@ def test_api_documents_attachment_upload_authenticated_forbidden(reach, role):
client.force_login(user)

document = factories.DocumentFactory(link_reach=reach, link_role=role)
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")
Expand Down Expand Up @@ -114,14 +120,14 @@ def test_api_documents_attachment_upload_authenticated_success(reach, role):
client.force_login(user)

document = factories.DocumentFactory(link_reach=reach, link_role=role)
file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(response.json()["file"])
file_id = match.group(1)

Expand All @@ -148,7 +154,7 @@ def test_api_documents_attachment_upload_reader(via, mock_user_teams):
document=document, team="lasuite", role="reader"
)

file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")
Expand Down Expand Up @@ -179,20 +185,28 @@ def test_api_documents_attachment_upload_success(via, role, mock_user_teams):
document=document, team="lasuite", role=role
)

file = SimpleUploadedFile("test_file.jpg", b"Dummy content")
file = SimpleUploadedFile(name="test.png", content=PIXEL, content_type="image/png")

url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.jpg")
match = pattern.search(response.json()["file"])
file_path = response.json()["file"]
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.png")
match = pattern.search(file_path)
file_id = match.group(1)

# Validate that file_id is a valid UUID
uuid.UUID(file_id)

# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id)}


def test_api_documents_attachment_upload_invalid(client):
"""Attempt to upload without a file should return an explicit error."""
Expand Down Expand Up @@ -222,34 +236,96 @@ def test_api_documents_attachment_upload_size_limit_exceeded(settings):
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"

# Create a temporary file larger than the allowed size
content = b"a" * (1048576 + 1)
file = ContentFile(content, name="test.jpg")
file = SimpleUploadedFile(
name="test.txt", content=b"a" * (1048576 + 1), content_type="text/plain"
)

response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 400
assert response.json() == {"file": ["File size exceeds the maximum limit of 1 MB."]}


def test_api_documents_attachment_upload_type_not_allowed(settings):
"""The uploaded file should be of a whitelisted type."""
settings.DOCUMENT_IMAGE_ALLOWED_MIME_TYPES = ["image/jpeg", "image/png"]

@pytest.mark.parametrize(
"name,content,extension",
[
("test.exe", b"text", "txt"),
("test", b"text", "txt"),
("test.aaaaaa", b"test", "txt"),
("test.txt", PIXEL, "png"),
("test.py", b"#!/usr/bin/python", "txt"),
],
)
def test_api_documents_attachment_upload_fix_extension(name, content, extension):
"""
A file with no extension or a wrong extension is accepted and the extension
is corrected in storage.
"""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)

document = factories.DocumentFactory(users=[(user, "owner")])
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"

# Create a temporary file with a not allowed type (e.g., text file)
file = ContentFile(b"a" * 1048576, name="test.txt")
file = SimpleUploadedFile(name=name, content=content)
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

# The .png extension was added to the file name in storage
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.{extension:s}")
match = pattern.search(response.json()["file"])
file_id = match.group(1)

# Validate that file_id is a valid UUID
uuid.UUID(file_id)


def test_api_documents_attachment_upload_empty_file():
"""An empty file should be rejected."""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)

document = factories.DocumentFactory(users=[(user, "owner")])
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"

file = SimpleUploadedFile(name="test.png", content=b"")
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 400
assert response.json() == {
"file": [
"File type 'text/plain' is not allowed. Allowed types are: image/jpeg, image/png"
]
}
assert response.json() == {"file": ["The submitted file is empty."]}


def test_api_documents_attachment_upload_unsafe():
"""A file with an unsafe mime type should be tagged as such."""
user = factories.UserFactory()
client = APIClient()
client.force_login(user)

document = factories.DocumentFactory(users=[(user, "owner")])
url = f"/api/v1.0/documents/{document.id!s}/attachment-upload/"

file = SimpleUploadedFile(
name="script.exe", content=b"\x4d\x5a\x90\x00\x03\x00\x00\x00"
)
response = client.post(url, {"file": file}, format="multipart")

assert response.status_code == 201

# The .png extension was added to the file name in storage
file_path = response.json()["file"]
pattern = re.compile(rf"^/media/{document.id!s}/attachments/(.*)\.exe")
match = pattern.search(file_path)
file_id = match.group(1)

# Validate that file_id is a valid UUID
uuid.UUID(file_id)

# Now, check the metadata of the uploaded file
key = file_path.replace("/media", "")
file_head = default_storage.connection.meta.client.head_object(
Bucket=default_storage.bucket_name, Key=key
)
assert file_head["Metadata"] == {"owner": str(user.id), "is_unsafe": "true"}
Loading
Loading